# How much time buses spend at stops

## Imports

In [None]:
import warnings
from datetime import datetime
from pathlib import Path

import geopandas as gpd
import gtfs_kit as gk
import pandas as pd
from google.transit.gtfs_realtime_pb2 import FeedMessage

## General settings

In [None]:
# pb2_path = Path("../data")
parquet_path = Path("../parquet")
sched_path = "../data/itm_south_east_gtfs.zip"

In [None]:
df = pd.concat(gpd.read_parquet(p) for p in sorted(parquet_path.iterdir()))
df.head()

In [None]:
rids = {
    "1": "3815",
    "10": "4824",
    "5": "14187",
    "5A": "50065",
}
df = df.loc[df.route_id.isin(rids.values())]

## Get bus stop location data

In [None]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    fd = gk.read_feed(sched_path, dist_units="mi")

In [None]:
tid_trips = fd.trips.loc[fd.trips.trip_id.isin(df.trip_id.unique())]
tid_trips.head(2)  # not used but has trip direction!

In [None]:
rel_stops = fd.stop_times.loc[fd.stop_times.trip_id.isin(df.trip_id)]
rel_stops.head(3)

In [None]:
stops = (
    fd.stops.loc[fd.stops.stop_id.isin(rel_stops.stop_id.unique())]
    .assign(
        geometry=lambda x: gpd.points_from_xy(x=x.stop_lon, y=x.stop_lat)
    )
    .drop(["stop_lon", "stop_lat"], axis=1)
    .pipe(gpd.GeoDataFrame, crs=4326)
)
stops.geometry = stops.to_crs(epsg=3857).buffer(30).to_crs(epsg=4326)
stops.head()

## And compare them!

In [None]:
# df_map = pd.concat([df[["geometry"]].assign(tp=0), stops[["geometry"]].assign(tp=1)])
# df_map.explore(tiles="CartoDB positron", cmap="viridis", column="tp")

In [None]:
def filter_relevant_stops(row):
    return (
        row.stop_id in rel_stops.loc[rel_stops.trip_id == row.trip_id].stop_id.unique()
    )

In [None]:
jnd = (
    df.sjoin(stops, how="left", predicate="within")
    .dropna(subset="index_right")
    .assign(keep=lambda x: x.apply(filter_relevant_stops, axis=1))
    .loc[lambda x: x.keep]
)
assert len(jnd.loc[jnd.duplicated(["trip_id", "timestamp"])]) == 0
jnd = jnd.assign(
    cum_stop=jnd.groupby(["trip_id", "current_stop", "stop_id"]).cumcount()
)
jnd.head()

In [None]:
cols = [
    "current_stop",
    "current_status",
    "timestamp",
    "stop_id",
    "stop_name",
    "cum_stop",
]
long_stops = jnd.loc[jnd.stop_id.isin(jnd.loc[jnd.cum_stop >= 1].stop_id.unique())]
long_stops = long_stops.sort_values(by=["trip_id", "current_stop"])
long_stops[cols]

In [None]:
def diff_calc(x):
    return x.iloc[-1] - x.iloc[0]

In [None]:
stop_time = pd.DataFrame(
    jnd.groupby(["route_id", "trip_id", "stop_id", "stop_name"])
    .timestamp.agg(diff_calc)
    .astype("timedelta64[s]")
)
stop_time.sort_values(by="timestamp", ascending=False)