In [1]:
import os
import folium
import requests
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import Polygon

from utils import *

In [2]:
parcs_url="https://donnees.montreal.ca/dataset/2e9e4d2f-173a-4c3d-a5e3-565d79baa27d/resource/35796624-15df-4503-a569-797665f8768e/download/espace_vert.json"
fountaines_url="https://donnees.montreal.ca/dataset/3ff400f3-63cd-446d-8405-842383377fb8/resource/26659739-540d-4fe2-8107-5f35ab7e807c/download/fontaine_eau_potable_v2018.csv"


In [3]:
WGS84=4326
MTM8=32188

def process_request(url):
    response = requests.get(url)
    response.raise_for_status()
    return response.json()


def json_items_to_geodataframe(http_response):
    return gpd.GeoDataFrame().from_features(http_response).set_crs(f"epsg:{WGS84}")



def process_fountains_data(url):
    df = pd.read_csv(url)
    return df


def create_fountains_geodataframe(df):
    gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.Longitude, df.Latitude)).set_crs(f"epsg:{WGS84}")
    return gdf

In [4]:
fountaines_df = process_fountains_data(fountaines_url)
fountaines_gdf = create_fountains_geodataframe(fountaines_df)

In [5]:
http_response = process_request(parcs_url)
parcs_gdf = json_items_to_geodataframe(http_response)

In [6]:
print(fountaines_gdf.crs)
print(parcs_gdf.crs)

epsg:4326
epsg:4326


In [7]:
parcs_gdf.columns = parcs_gdf.columns.str.lower()
fountaines_gdf.columns = fountaines_gdf.columns.str.lower()

In [8]:
columns = ["id", "longitude", "latitude", "geometry"]
fountaines_gdf = fountaines_gdf.filter(columns)

columns = ["objectid", "superficie", "geometry"]
parcs_gdf = parcs_gdf.filter(columns)

In [9]:
fountaines_in_parcs = fountaines_gdf.sjoin(parcs_gdf, predicate='within').drop(columns="index_right")
fountaines_in_parcs = fountaines_in_parcs.astype({
                        "id": "int64", 
                        "objectid": "int64",
                        "longitude": "float64",
                        "latitude": "float64",
                        "superficie": "float64"})

In [10]:
# group by objectid and count rows
grouped = fountaines_in_parcs.groupby('objectid').count()
grouped

Unnamed: 0_level_0,id,longitude,latitude,geometry,superficie
objectid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3479,1,1,1,1,1
3482,4,4,4,4,4
3485,2,2,2,2,2
3494,2,2,2,2,2
3523,1,1,1,1,1
...,...,...,...,...,...
5659,2,2,2,2,2
5662,3,3,3,3,3
5679,2,2,2,2,2
5685,5,5,5,5,5


In [11]:
# add new column 'count' to original dataframe
fountaines_in_parcs['count'] = fountaines_in_parcs['objectid'].map(grouped['id'])

In [12]:
fountaines_in_parcs

Unnamed: 0,id,longitude,latitude,geometry,objectid,superficie,count
0,1,-73.589462,45.592012,POINT (-73.58946 45.59201),5069,11.804620,5
1,2,-73.590055,45.592189,POINT (-73.59006 45.59219),5069,11.804620,5
2,3,-73.590041,45.592181,POINT (-73.59004 45.59218),5069,11.804620,5
3,4,-73.589008,45.590781,POINT (-73.58901 45.59078),5069,11.804620,5
4,5,-73.591582,45.592574,POINT (-73.59158 45.59257),5069,11.804620,5
...,...,...,...,...,...,...,...
840,854,-73.597158,45.497578,POINT (-73.59716 45.49758),5629,188.360395,18
841,855,-73.583063,45.505103,POINT (-73.58306 45.50510),5629,188.360395,18
842,856,-73.585721,45.514257,POINT (-73.58572 45.51426),5629,188.360395,18
843,857,-73.585019,45.513926,POINT (-73.58502 45.51393),5629,188.360395,18


In [13]:
fountaines_in_parcs.drop(columns="geometry").to_csv("fountains-in-parcs-counts.csv", index=False)

In [14]:
import pandas as pd

# create example dataframe
df = pd.DataFrame({
    'id': [1, 1, 2, 2, 2, 3],
    'col1': ['a', 'b', 'c', 'd', 'e', 'f'],
    'col2': [10, 20, 30, 40, 50, 60]
})

# group by ID and count rows
grouped = df.groupby('id').count()

# add new column 'count' to original dataframe
df['count'] = df['id'].map(grouped['col1'])

# display the result
print(df)


   id col1  col2  count
0   1    a    10      2
1   1    b    20      2
2   2    c    30      3
3   2    d    40      3
4   2    e    50      3
5   3    f    60      1


In [15]:
grouped

Unnamed: 0_level_0,col1,col2
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,2,2
2,3,3
3,1,1
