In [1]:
import geopandas as gpd
from shapely.ops import nearest_points
from shapely.geometry import Point
import pandas as pd

In [2]:
# Load GeoJSON file into GeoDataFrame
gdf = gpd.read_file(filename='../data/geojson/jardins_parques_urbanos.geojson')

In [None]:
gdf.head()

In [4]:
if gdf.crs.to_string() != 'EPSG:4326':
    raise ValueError('CRS is not EPSG:4326')

In [5]:
df = pd.read_csv('../data/training/unioned-features-rent.csv')

In [6]:
def add_distance_and_nearest_poi_name(df, feature_name, poi_name, geojson_path, latitude_col='latitude', longitude_col='longitude', poi_crs="EPSG:4326"):
    """
    Adds a distance column and the name of the nearest POI to the DataFrame from a GeoJSON file.
    
    Args:
    - df (pd.DataFrame): DataFrame with latitude and longitude columns.
    - feature_name (str): Name of the feature to be added to the DataFrame.
    - poi_name (str): Name of the POI feature in the GeoJSON file.
    - geojson_path (str): Path to the GeoJSON file.
    - latitude_col (str): Name of the latitude column in df.
    - longitude_col (str): Name of the longitude column in df.
    - poi_crs (str): CRS of the POI data (default: EPSG:4326 for WGS84).
    
    Returns:
    - pd.DataFrame: Original DataFrame with added distance and nearest POI name columns.
    """
    # Copy DataFrame to avoid modifying the original
    new_df = df.copy()

    # Load GeoJSON as GeoDataFrame
    poi_gdf = gpd.read_file(geojson_path)
    poi_gdf = poi_gdf.to_crs(poi_crs)  # Ensure correct CRS

    # Convert DataFrame to GeoDataFrame
    listings_gdf = gpd.GeoDataFrame(
        new_df,
        geometry=gpd.points_from_xy(new_df[longitude_col], new_df[latitude_col]),
        crs=poi_crs
    )

    # Reproject to metric CRS for distance calculation
    metric_crs = "EPSG:3857"
    poi_gdf = poi_gdf.to_crs(metric_crs)
    listings_gdf = listings_gdf.to_crs(metric_crs)

    # Compute nearest distances and POI names
    def compute_nearest_poi_info(listing, poi_gdf):
        # Find the nearest POI geometry
        nearest_point = nearest_points(listing.geometry, poi_gdf.unary_union)[1]
        # Find the row in poi_gdf corresponding to the nearest geometry
        nearest_poi = poi_gdf.loc[poi_gdf.geometry == nearest_point]
        # Extract the distance and name of the nearest POI
        distance = listing.geometry.distance(nearest_point)
        name = nearest_poi[poi_name].values[0] if not nearest_poi.empty else None
        return distance, name

    # Apply the function to compute both distance and nearest POI name
    results = listings_gdf.apply(
        lambda row: compute_nearest_poi_info(row, poi_gdf), axis=1
    )

    # Extract distances and POI names into separate columns
    listings_gdf[f'distance_to_nearest_{feature_name}'] = results.apply(lambda x: x[0])
    listings_gdf[f'nearest_{feature_name}_name'] = results.apply(lambda x: x[1])

    # Add distances and names back to the original DataFrame
    new_df[f'distance_to_nearest_{feature_name}'] = listings_gdf[f'distance_to_nearest_{feature_name}']
    new_df[f'nearest_{feature_name}_name'] = listings_gdf[f'nearest_{feature_name}_name']
    
    return new_df

In [7]:
df = add_distance_and_nearest_poi_name(df, 'mall', 'INF_NOME',  '../data/geojson/centros_comerciais.geojson')

In [None]:
df

In [9]:
df = add_distance_and_nearest_poi_name(df, 'metro', 'NOME',  '../data/geojson/metro.geojson')

In [10]:
df = add_distance_and_nearest_poi_name(df, 'train', 'NOME',  '../data/geojson/comboios.geojson')

In [11]:
df = add_distance_and_nearest_poi_name(df, 'park', 'INF_NOME',  '../data/geojson/jardins_parques_urbanos.geojson')

In [None]:
df