# Travel Time Analysis

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
from pathlib import Path

# add the
ROOT = Path(os.getcwd())
while not (ROOT / ".git").exists():
    ROOT = ROOT.parent

sys.path.append(str(ROOT))

from src.config import CargoBikeConfig, load_config
from src.osm_tags import build_tag_filter


import polars as pl
import geopolars as gpl
import geopandas as gpd
import pandas as pd

## Load Config


In [3]:
config = load_config(ROOT / "config" / "paper.yaml")

In [4]:
h3_df = (
    pd.concat([gpd.read_parquet(city.h3_file).assign(city=city.name) for city in config.Cities], axis=0)
    .query("is_city")
)

h3_df.head()

Unnamed: 0_level_0,geometry,is_city,city
region_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
892a339a5afffff,"POLYGON ((-71.13572 42.23376, -71.13794 42.232...",True,"Boston, USA"
892a3066a3bffff,"POLYGON ((-71.08114 42.30902, -71.08337 42.308...",True,"Boston, USA"
892a302a567ffff,"POLYGON ((-70.82381 42.36269, -70.82604 42.361...",True,"Boston, USA"
892a3066e17ffff,"POLYGON ((-71.06072 42.33323, -71.06295 42.332...",True,"Boston, USA"
892a3066b3bffff,"POLYGON ((-71.06614 42.29023, -71.06837 42.289...",True,"Boston, USA"


## Load the Amazon Data

In [5]:
## Load the Service Time Data 
service_time_city = CargoBikeConfig.get_service_time_city(config, 'amazon')
# this does two things, one adds the city label and 2, it crops to the city limits

service_time_df = pl.read_parquet(service_time_city.file).with_columns(
    pl.col(service_time_city.service_time_col).alias('service_time'),
)


service_time_df.with_columns(
    
)

service_time_df = service_time_df.join(
    pl.DataFrame(h3_df.reset_index()[['region_id', 'is_city',]]), 
    left_on='h3', 
    right_on='region_id',
    how='left'
)

### Identify Sequential Deliveries where Both in City Limits

In [6]:
sequential_df = service_time_df.with_columns(
    pl.col('is_city').sort_by(pl.col('order')).shift(1).over('route_id').alias('is_city_prev'),
).filter(
    pl.col('is_city_prev') & pl.col('is_city')
)

sequential_df.head()

stop_id,route_id,station_code,executor_capacity_cm3,lat,lng,type,dataset,h3,package_num,has_time_window,planned_service_time,width,depth,height,volume,status,order,travel_time,arrival_datetime,time_of_day,city,service_time,is_city,is_city_prev
str,str,str,f64,f64,f64,str,str,str,u32,bool,f64,f64,f64,f64,f64,bool,i64,f32,"datetime[μs, EST]",u32,str,f64,bool,bool
"""VO""","""RouteID_de15d6…","""DBO2""",4672279.5,42.332404,-71.028593,"""Dropoff""","""almrrc2021-dat…","""892a3066c17fff…",2,False,190.0,36.55,50.5,20.3,93476.832,True,2,132.0,2018-06-17 09:33:04.699951 EST,34384,"""Boston, USA""",190.0,True,True
"""YR""","""RouteID_de15d6…","""DBO2""",4672279.5,42.332391,-71.029564,"""Dropoff""","""almrrc2021-dat…","""892a3066c17fff…",2,False,131.0,29.25,34.95,17.15,36364.866,True,3,16.0,2018-06-17 09:35:31.699951 EST,34531,"""Boston, USA""",131.0,True,True
"""DT""","""RouteID_de15d6…","""DBO2""",4672279.5,42.332368,-71.030761,"""Dropoff""","""almrrc2021-dat…","""892a3066c17fff…",3,False,108.9,22.6,33.633333,8.3,23890.056,True,4,19.700001,2018-06-17 09:37:40.299951 EST,34660,"""Boston, USA""",108.9,True,True
"""DQ""","""RouteID_de15d6…","""DBO2""",4672279.5,42.332337,-71.032741,"""Dropoff""","""almrrc2021-dat…","""892a3066c03fff…",1,False,49.0,25.1,27.7,5.3,3684.931,True,5,33.900002,2018-06-17 09:39:03.199953 EST,34743,"""Boston, USA""",49.0,True,True
"""QT""","""RouteID_de15d6…","""DBO2""",4672279.5,42.33232,-71.033697,"""Dropoff""","""almrrc2021-dat…","""892a3066c03fff…",2,False,42.0,17.55,27.45,5.7,6354.075,True,6,18.1,2018-06-17 09:40:03.299953 EST,34803,"""Boston, USA""",42.0,True,True


In [7]:
print(sequential_df.group_by(['city']).agg(
    pl.count(),
    pl.mean('travel_time'),
    pl.std('travel_time').alias('std_travel_time'),
    pl.col('travel_time').quantile(0.1).alias('10th_travel_time'),
    pl.col('travel_time').quantile(0.5).alias('median_travel_time'),
    pl.col('travel_time').quantile(0.9).alias('90th_travel_time'),
).sort('city').to_pandas().to_latex(
    index=False,
    float_format="%.1f",
))

\begin{tabular}{lrrrrrr}
\toprule
city & count & travel_time & std_travel_time & 10th_travel_time & median_travel_time & 90th_travel_time \\
\midrule
Austin, USA & 29879 & 62.0 & 67.7 & 11.3 & 43.6 & 129.1 \\
Boston, USA & 21495 & 107.1 & 85.5 & 14.6 & 90.1 & 224.9 \\
Chicago, USA & 40306 & 114.3 & 81.6 & 16.0 & 110.7 & 218.7 \\
Seattle, USA & 56441 & 55.2 & 50.7 & 12.0 & 41.7 & 112.9 \\
\bottomrule
\end{tabular}



In [8]:
print(sequential_df.select(
    pl.count(),
    pl.mean('travel_time'),
    pl.std('travel_time').alias('std_travel_time'),
    pl.col('travel_time').quantile(0.1).alias('10th_travel_time'),
    pl.col('travel_time').quantile(0.5).alias('median_travel_time'),
    pl.col('travel_time').quantile(0.9).alias('90th_travel_time'),
).to_pandas().to_latex(
    index=False,
    float_format="%.1f",
))

\begin{tabular}{rrrrrr}
\toprule
count & travel_time & std_travel_time & 10th_travel_time & median_travel_time & 90th_travel_time \\
\midrule
148121 & 80.2 & 74.2 & 12.8 & 55.5 & 181.9 \\
\bottomrule
\end{tabular}



In [11]:
sequential_df[['travel_time']].write_parquet(
    "~/Downloads/amazon_travel_time.parquet"
)