In [1]:
%load_ext autoreload
%autoreload 2

# Brussels Cargo Bike Analysis


In [2]:
import os
import sys
from pathlib import Path

# add the
ROOT = Path(os.getcwd())
while not (ROOT / ".git").exists():
    ROOT = ROOT.parent

sys.path.append(str(ROOT))

from src.config import CargoBikeConfig, load_config

import polars as pl
import geopandas as gpd
import pandas as pd

## Open the Files

In [3]:
config = load_config(ROOT / "config" / "paper.yaml")

brussel_config = CargoBikeConfig.get_service_time_city(config, "brussels")

# brussel_config

### Open the H3 File

In [4]:
h3_df = (
    pd.concat([gpd.read_parquet(city.h3_file).assign(city=city.name) for city in config.Cities], axis=0)
    .query("is_city")
)

h3_df.head()

Unnamed: 0_level_0,geometry,is_city,city
region_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
892a339a5afffff,"POLYGON ((-71.13572 42.23376, -71.13794 42.232...",True,"Boston, USA"
892a3066a3bffff,"POLYGON ((-71.08114 42.30902, -71.08337 42.308...",True,"Boston, USA"
892a302a567ffff,"POLYGON ((-70.82381 42.36269, -70.82604 42.361...",True,"Boston, USA"
892a3066e17ffff,"POLYGON ((-71.06072 42.33323, -71.06295 42.332...",True,"Boston, USA"
892a3066b3bffff,"POLYGON ((-71.06614 42.29023, -71.06837 42.289...",True,"Boston, USA"


In [5]:
service_time_df = pl.read_parquet(brussel_config.file)

service_time_df.head()

total_stop_time,predicted_service_time,predicted_service_time_95th,region_id,sequence,rider,gps_stop_time,gps_start_time,package_number
i64,f64,f64,str,i64,u64,"datetime[ns, UTC]","datetime[ns, UTC]",i64
243,159.289408,210.445637,"""891fa44186ffff…",3,1325826965756745496,2022-10-29 08:33:50 UTC,2022-10-29 08:37:53 UTC,2
185,119.406319,154.256966,"""891fa441b53fff…",4,1325826965756745496,2022-10-29 08:58:41 UTC,2022-10-29 09:01:46 UTC,1
440,290.286565,390.516136,"""891fa441bdbfff…",5,1325826965756745496,2022-10-29 09:04:26 UTC,2022-10-29 09:11:46 UTC,1
197,132.540481,173.542221,"""891fa441b83fff…",2,1325826965756745496,2022-10-29 09:18:46 UTC,2022-10-29 09:22:03 UTC,1
340,218.079184,296.746872,"""891fa44ad0ffff…",2,1325826965756745496,2022-10-31 05:59:34 UTC,2022-10-31 06:05:14 UTC,2


In [7]:
service_time_df = service_time_df.filter(
    pl.col('region_id').is_in(h3_df.index.to_list())
)

## Summary

In [8]:
service_time_df['gps_stop_time'].min().isoformat(), service_time_df['gps_stop_time'].max().isoformat()

('2022-10-29T08:33:50+00:00', '2023-09-30T09:37:55+00:00')

In [9]:
len(service_time_df.select(
    pl.col("rider"),
    pl.col("gps_stop_time").dt.date()
).unique())

786

In [10]:
service_time_df.shape[0]

10847

In [11]:
from srai.plotting import plot_numeric_data
from srai.h3 import h3_to_shapely_geometry
from srai.constants import WGS84_CRS, REGIONS_INDEX
import geopandas as gpd

In [12]:
grouped_df = service_time_df.to_pandas().rename({'h3': REGIONS_INDEX}, axis=1).groupby(REGIONS_INDEX).agg(
    {'predicted_service_time': 'count'}
)


gdf = gpd.GeoDataFrame(
    grouped_df,
    geometry=h3_to_shapely_geometry(grouped_df.index),
    crs=WGS84_CRS
)

gdf.head()

Unnamed: 0_level_0,predicted_service_time,geometry
region_id,Unnamed: 1_level_1,Unnamed: 2_level_1
891fa440003ffff,1,"POLYGON ((4.32648 50.89679, 4.32680 50.89518, ..."
891fa440027ffff,1,"POLYGON ((4.34322 50.89668, 4.34355 50.89506, ..."
891fa44003bffff,1,"POLYGON ((4.33393 50.89536, 4.33426 50.89374, ..."
891fa440047ffff,1,"POLYGON ((4.32281 50.89128, 4.32314 50.88966, ..."
891fa44004bffff,2,"POLYGON ((4.31634 50.88785, 4.31667 50.88623, ..."


In [13]:
m = plot_numeric_data(gdf, data_column='predicted_service_time', )
m

In [14]:
import math

circle = gpd.GeoSeries(gdf.to_crs(gdf.estimate_utm_crs()).unary_union, crs=gdf.estimate_utm_crs()).minimum_bounding_circle()

area = circle.area / 10 ** 6

# get the radius
radius = (area / math.pi) ** 0.5

radius

0    8.133241
dtype: float64

In [15]:
circle.to_crs(WGS84_CRS).explore(m=m)