In [167]:
import requests
import numpy as np
import pandas as pd
import geopandas as gpd
from pandas import json_normalize

from scipy.spatial import cKDTree
from shapely.geometry import shape
from geopy.distance import distance

In [135]:
WGS84=4326
MTM8=32188

def process_request(url):
    response = requests.get(url)
    response.raise_for_status()
    return response.json()


def json_items_to_geodataframe(http_response):
    return gpd.GeoDataFrame().from_features(http_response).set_crs(f"epsg:{WGS84}")


def prepare_data(gdf):
    gdf.columns = gdf.columns.str.lower()
    gdf_mtm8 = gdf.to_crs(epsg=MTM8)
    return gdf_mtm8


def process_fountains_data(url):
    df = pd.read_csv(url)
    return df


def create_fountains_geodataframe(df):
    gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.Longitude, df.Latitude)).set_crs(f"epsg:{WGS84}")
    return gdf


def calculate_nearest_fountains_distance(row):
    point = row['geometry']
    closest_point = row['closest_point']
    return distance((point.y, point.x), (closest_point.y, closest_point.x)).meters


def get_nearest_fountains(gdf):
    # Create a KDTree from the geometry of the GeoDataFrame
    tree = cKDTree(np.array(gdf.geometry.apply(lambda geom: [geom.x, geom.y])).tolist())

    # Query the tree for the closest points to each point in the GeoDataFrame
    distances, indices = tree.query(np.array(gdf.geometry.apply(lambda geom: [geom.x, geom.y])).tolist(), k=2)

    # Get the closest point for each row
    gdf['closest_point'] = gdf.geometry.iloc[indices[:, 1]].values
    
    return gdf

In [136]:
rev_url="https://donnees.montreal.ca/dataset/8a4bf03c-dff6-4add-b58b-c38954b0ed0d/resource/8ad67029-cf2e-49ae-a4b6-20d31611ab6e/download/reseau-express-velo.geojson"
parcs_url="https://donnees.montreal.ca/dataset/2e9e4d2f-173a-4c3d-a5e3-565d79baa27d/resource/35796624-15df-4503-a569-797665f8768e/download/espace_vert.json"
piste_cyclable_url="https://donnees.montreal.ca/dataset/5ea29f40-1b5b-4f34-85b3-7c67088ff536/resource/0dc6612a-be66-406b-b2d9-59c9e1c65ebf/download/reseau_cyclable.geojson"
fountaines_url="https://donnees.montreal.ca/dataset/3ff400f3-63cd-446d-8405-842383377fb8/resource/26659739-540d-4fe2-8107-5f35ab7e807c/download/fontaine_eau_potable_v2018.csv"
rvertes_url="https://data.montreal.ca/dataset/ab3ce7bb-09a7-49d7-8f76-461ed4c39937/resource/15883136-0180-4061-9860-d7ce3d46c73c/download/ruelles-vertes.geojson"
mobilier_urbain_url="https://donnees.montreal.ca/dataset/fb04fa09-fda1-44df-b575-1d14b2508372/resource/65766e31-f186-4ac9-9595-bfcf47ae9158/download/mobilierurbaingp.geojson"
terrain_externe_url="https://donnees.montreal.ca/dataset/60850740-dd83-47ee-9a19-13d674e90314/resource/2dac229f-6089-4cb7-ab0b-eadc6a147d5d/download/terrain_sport_ext.json"

In [137]:
fountaines_df = process_fountains_data(fountaines_url)
fountaines_gdf = create_fountains_geodataframe(fountaines_df)

In [138]:
fountaines_gdf = get_nearest_fountains(fountaines_gdf)
fountaines_gdf["nearest_fountain"] = fountaines_gdf.apply(calculate_nearest_fountains_distance, axis=1)
print(fountaines_gdf.crs)
fountaines_gdf = prepare_data(fountaines_gdf)
print(fountaines_gdf.crs)

epsg:4326
EPSG:32188


In [139]:
http_response = process_request(rvertes_url)
rvertes_gdf = json_items_to_geodataframe(http_response)
print(rvertes_gdf.crs)
rvertes_gdf = prepare_data(rvertes_gdf)
print(rvertes_gdf.crs)

epsg:4326
EPSG:32188


In [140]:
http_response = process_request(terrain_externe_url)
terrain_externe_gdf = json_items_to_geodataframe(http_response)
print(terrain_externe_gdf.crs)
terrain_externe_gdf = prepare_data(terrain_externe_gdf)
print(terrain_externe_gdf.crs)

epsg:4326
EPSG:32188


In [141]:
http_response = process_request(rev_url)
rev_gdf = json_items_to_geodataframe(http_response)
print(rev_gdf.crs)
rev_gdf = prepare_data(rev_gdf)
print(rev_gdf.crs)

epsg:4326
EPSG:32188


In [142]:
http_response = process_request(piste_cyclable_url)
piste_cyclable_gdf = json_items_to_geodataframe(http_response)
print(piste_cyclable_gdf.crs)
piste_cyclable_gdf = prepare_data(piste_cyclable_gdf)
print(piste_cyclable_gdf.crs)

epsg:4326
EPSG:32188


In [143]:
http_response = process_request(mobilier_urbain_url)
mobilier_urbain_gdf = json_items_to_geodataframe(http_response)
print(mobilier_urbain_gdf.crs)
mobilier_urbain_gdf = prepare_data(mobilier_urbain_gdf)
print(mobilier_urbain_gdf.crs)

epsg:4326
EPSG:32188


In [144]:
print(f"rev: {len(rev_gdf)}")
print(f"fountaines: {len(fountaines_gdf)}")
print(f"ruelles vertes: {len(rvertes_gdf)}")
print(f"piste cyclable: {len(piste_cyclable_gdf)}")
print(f"mobilier urbains: {len(mobilier_urbain_gdf)}")
print(f"terrain externes: {len(terrain_externe_gdf)}")

rev: 87
fountaines: 848
ruelles vertes: 1254
piste cyclable: 8907
mobilier urbains: 6797
terrain externes: 3476


In [145]:
print(f"rev: {rev_gdf.crs}")
print(f"fountaines: {fountaines_gdf.crs}")
print(f"ruelles vertes: {rvertes_gdf.crs}")
print(f"piste cyclable: {piste_cyclable_gdf.crs}")
print(f"mobilier urbains: {mobilier_urbain_gdf.crs}")
print(f"terrain externes: {terrain_externe_gdf.crs}")

rev: EPSG:32188
fountaines: EPSG:32188
ruelles vertes: EPSG:32188
piste cyclable: EPSG:32188
mobilier urbains: EPSG:32188
terrain externes: EPSG:32188


In [146]:
rev_gdf = rev_gdf.rename(columns={"id": "objectid"})
rvertes_gdf = rvertes_gdf.rename(columns={"id_trc": "objectid"})
piste_cyclable_gdf = piste_cyclable_gdf.rename(columns={"id_cycl": "objectid"})

In [147]:
columns = ["objectid", "geometry"]
rev_filtred_gdf = rev_gdf.filter(columns)
rvertes_filtred_gdf = rvertes_gdf.filter(columns)
piste_cyclable_filtred_gdf = piste_cyclable_gdf.filter(columns)
mobilier_urbain_filtred_gdf = mobilier_urbain_gdf.filter(columns)
terrain_externe_filtred_gdf = terrain_externe_gdf.filter(columns)

In [148]:
columns = ["id", "geometry", "nearest_fountain"]
fountaines_filtred_gdf = fountaines_gdf.filter(columns)

In [149]:
fountains_nearest_rvertes = \
 fountaines_filtred_gdf.sjoin_nearest(rvertes_filtred_gdf, how="left", distance_col="nearest_rvertes")

In [150]:
fountains_nearest_rev = \
 fountaines_filtred_gdf.sjoin_nearest(rev_filtred_gdf, how="left", distance_col="nearest_rev")

In [151]:
fountains_nearest_piste_cyclable = \
 fountaines_filtred_gdf.sjoin_nearest(piste_cyclable_filtred_gdf, how="left", distance_col="nearest_pcyclable")

In [152]:
fountains_nearest_mobilier_urbain = \
 fountaines_filtred_gdf.sjoin_nearest(mobilier_urbain_filtred_gdf, how="left", distance_col="nearest_murbain")

In [153]:
fountains_nearest_installation_externe = \
 fountaines_filtred_gdf.sjoin_nearest(terrain_externe_filtred_gdf, how="left", distance_col="nearest_iexterne")

In [154]:
print(f"min nearest rev: {min(fountains_nearest_rev['nearest_rev'])}")
print(f"min nearest ruelles vertes: {min(fountains_nearest_rvertes['nearest_rvertes'])}")
print(f"min nearest piste cyclable: {min(fountains_nearest_piste_cyclable['nearest_pcyclable'])}")
print(f"min nearest mobilier urbain: {min(fountains_nearest_mobilier_urbain['nearest_murbain'])}")
print(f"min nearest installation externe: {min(fountains_nearest_installation_externe['nearest_iexterne'])}")

min nearest rev: 10.765171370946362
min nearest ruelles vertes: 2.5745363322386083
min nearest piste cyclable: 0.6113412388749414
min nearest mobilier urbain: 0.0003687759228418947
min nearest installation externe: 1.1995347638310314


In [155]:
print(f"nearest rev: {len(fountains_nearest_rev)}")
print(f"ruelles vertes: {len(fountains_nearest_rvertes)}")
print(f"piste cyclable: {len(fountains_nearest_piste_cyclable)}")
print(f"mobilier urbain: {len(fountains_nearest_mobilier_urbain)}")
print(f"installation externe: {len(fountains_nearest_installation_externe)}")

nearest rev: 865
ruelles vertes: 854
piste cyclable: 921
mobilier urbain: 848
installation externe: 848


In [156]:
len(fountaines_filtred_gdf)

848

In [157]:
fountains_nearest_rev = fountains_nearest_rev.drop_duplicates(subset='id', keep="first")
fountains_nearest_rvertes = fountains_nearest_rvertes.drop_duplicates(subset='id', keep="first")
fountains_nearest_piste_cyclable = fountains_nearest_piste_cyclable.drop_duplicates(subset='id', keep="first")

In [158]:
fountains_nearest_rev = fountains_nearest_rev.sort_values(by=["id"])
fountains_nearest_rvertes = fountains_nearest_rvertes.sort_values(by=["id"])
fountains_nearest_piste_cyclable = fountains_nearest_piste_cyclable.sort_values(by=["id"])
fountains_nearest_mobilier_urbain = fountains_nearest_mobilier_urbain.sort_values(by=["id"])
fountains_nearest_installation_externe = fountains_nearest_installation_externe.sort_values(by=["id"])

In [159]:
rev = fountains_nearest_rev[["id", "nearest_rev", "nearest_fountain"]]
rvertes = fountains_nearest_rvertes[["nearest_rvertes"]]
pcyclable = fountains_nearest_piste_cyclable[["nearest_pcyclable"]]
murbain = fountains_nearest_mobilier_urbain[["nearest_murbain"]]
iexterne = fountains_nearest_installation_externe[["nearest_iexterne"]]

In [160]:
# concatenate columns
dfs = [rev, rvertes, pcyclable, murbain, iexterne]
result = pd.concat(dfs, axis=1)

In [161]:
result.head()

Unnamed: 0,id,nearest_rev,nearest_fountain,nearest_rvertes,nearest_pcyclable,nearest_murbain,nearest_iexterne
0,1,2697.569104,48.90797,2512.295088,176.113692,2529.702004,21.446787
1,2,2745.451551,1.408827,2541.866572,167.3304,2495.967421,33.065103
2,3,2744.043719,1.408827,2540.756133,167.190902,2497.152536,32.6522
3,4,2582.406191,141.348494,2371.053545,84.592035,2670.52993,18.837126
4,5,2864.11922,126.598086,2456.527385,79.330947,2419.307259,29.983704


In [162]:
df = result.copy() 
# Set 'id' column as index
df = df.set_index('id')

# Find the column with the minimum value for each row
min_col = df.idxmin(axis=1)

# Find the minimum value for each row
min_val = df.min(axis=1)

# Create a new DataFrame with the id and the column name where the minimum value was found
min_df = pd.DataFrame({'id': df.index, 'min_col': min_col, 'min_val': min_val})

# Reset the index
min_df = min_df.reset_index(drop=True)

In [163]:
min_df.head()

Unnamed: 0,id,min_col,min_val
0,1,nearest_iexterne,21.446787
1,2,nearest_fountain,1.408827
2,3,nearest_fountain,1.408827
3,4,nearest_iexterne,18.837126
4,5,nearest_iexterne,29.983704


In [164]:
inner_merged = pd.merge(result, min_df)

In [165]:
inner_merged.head()

Unnamed: 0,id,nearest_rev,nearest_fountain,nearest_rvertes,nearest_pcyclable,nearest_murbain,nearest_iexterne,min_col,min_val
0,1,2697.569104,48.90797,2512.295088,176.113692,2529.702004,21.446787,nearest_iexterne,21.446787
1,2,2745.451551,1.408827,2541.866572,167.3304,2495.967421,33.065103,nearest_fountain,1.408827
2,3,2744.043719,1.408827,2540.756133,167.190902,2497.152536,32.6522,nearest_fountain,1.408827
3,4,2582.406191,141.348494,2371.053545,84.592035,2670.52993,18.837126,nearest_iexterne,18.837126
4,5,2864.11922,126.598086,2456.527385,79.330947,2419.307259,29.983704,nearest_iexterne,29.983704


In [166]:
inner_merged[inner_merged["min_val"] > 100]

Unnamed: 0,id,nearest_rev,nearest_fountain,nearest_rvertes,nearest_pcyclable,nearest_murbain,nearest_iexterne,min_col,min_val
43,44,232.048656,480.537749,264.720491,137.16688,1421.938007,508.442368,nearest_pcyclable,137.16688
69,70,3029.009155,346.348627,218.636881,279.063847,765.416977,117.362236,nearest_iexterne,117.362236
112,113,2556.871326,652.667612,2511.545512,338.504487,4334.501341,361.437028,nearest_pcyclable,338.504487
166,168,3684.803556,372.432475,254.145471,103.919023,1507.796111,165.876529,nearest_pcyclable,103.919023
167,169,3819.581629,347.557494,368.680756,173.596096,1896.471712,245.761976,nearest_pcyclable,173.596096
188,190,7281.502194,412.940179,282.098548,112.314614,1325.641057,231.402535,nearest_pcyclable,112.314614
256,267,7232.954794,324.692805,7207.789215,148.064191,945.101555,198.714873,nearest_pcyclable,148.064191
285,296,4740.187547,193.474005,4664.339913,219.590718,2768.907649,163.489621,nearest_iexterne,163.489621
286,297,4229.873149,319.620316,4133.170022,214.172889,3432.137141,315.880688,nearest_pcyclable,214.172889
345,356,1771.934152,321.236886,174.01248,137.028048,1436.50164,271.728553,nearest_pcyclable,137.028048
