# How much time buses spend at stops

## Imports

In [1]:
import warnings
from datetime import datetime
from pathlib import Path

import geopandas as gpd
import gtfs_kit as gk
import pandas as pd
from google.transit.gtfs_realtime_pb2 import FeedMessage

## General settings

In [2]:
pb2_path = Path("../data")
sched_path = "../data/itm_south_east_gtfs.zip"

## Load pb2 data

In [3]:
trips = []
for f in sorted(pb2_path.glob("*.pb2")):
    msg = FeedMessage()
    msg.ParseFromString(f.read_bytes())
    for t in msg.entity:
        trips.append(t)

In [4]:
# 1 == STOPPED_AT
# 2 == IN_TRANSIT_TO
rows = [
    {
        "id": t.id,
        "trip_id": t.vehicle.trip.trip_id,
        "route_id": t.vehicle.trip.route_id,
        "start_time": t.vehicle.trip.start_time,
        "start_date": t.vehicle.trip.start_date,
        "latitude": t.vehicle.position.latitude,
        "longitude": t.vehicle.position.longitude,
        "current_stop": t.vehicle.current_stop_sequence,
        "current_status": t.vehicle.current_status,
        "timestamp": datetime.utcfromtimestamp(t.vehicle.timestamp),
        "vehicle": t.vehicle.vehicle.id,
    }
    for t in trips
]
df = pd.DataFrame(rows).drop_duplicates()
df.timestamp = df.timestamp.dt.tz_localize("UTC")
df = df.loc[df.latitude <= 53]

Convert to GeoDataFrame

In [5]:
df = (
    df.assign(geometry=gpd.points_from_xy(x=df.longitude, y=df.latitude))
    .drop(["longitude", "latitude"], axis=1)
    .pipe(gpd.GeoDataFrame, crs=4326)
)
df.head(2)

Unnamed: 0,id,trip_id,route_id,start_time,start_date,current_stop,current_status,timestamp,vehicle,geometry
0,13763630073698663407,VJ9319dc35a520096b9b1e6abe863fd465be3f5e80,50065,16:10:00,20221015,13,2,2022-10-15 16:37:46+00:00,300,POINT (-1.21231 51.72981)
1,15671302897479048672,VJa30a7b0edb53edbd19908a14241d6f9f8d43fd1a,50065,16:25:00,20221015,14,2,2022-10-15 16:37:43+00:00,311,POINT (-1.21449 51.73141)


## Get bus stop location data

In [6]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    fd = gk.read_feed(sched_path, dist_units="mi")

In [7]:
tid_trips = fd.trips.loc[fd.trips.trip_id.isin(df.trip_id.unique())]
tid_trips.head(2)  # not used but has trip direction!

Unnamed: 0,route_id,service_id,trip_id,trip_headsign,block_id,shape_id,wheelchair_accessible,trip_direction_name,vehicle_journey_code
20516,50065,210,VJ967a265ecfa77f299469a336b6d896cc97ee755e,Oxford City Centre,,,0,,"VJ_49-5A-O-y10-1-210-T0,VJ_49-5A-O-y10-1-231-T..."
20532,50065,176,VJ843caff8938f113107e2ff80c66afd2f80ff2f9d,Minchery Farm,,,0,,"VJ_49-5A-O-y10-1-178-T0,VJ_49-5A-O-y10-1-256-T2"


In [8]:
rel_stops = fd.stop_times.loc[fd.stop_times.trip_id.isin(df.trip_id)]
rel_stops.head(3)

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint,stop_direction_name
646237,VJ967a265ecfa77f299469a336b6d896cc97ee755e,19:10:00,19:10:00,340001235OPP,0,,0,1,,1,
646238,VJ967a265ecfa77f299469a336b6d896cc97ee755e,19:11:00,19:11:00,340001236OPP,1,,0,0,,1,
646239,VJ967a265ecfa77f299469a336b6d896cc97ee755e,19:12:00,19:12:00,340001244OPS,2,,0,0,,1,


In [9]:
stops = (
    fd.stops.loc[fd.stops.stop_id.isin(rel_stops.stop_id.unique())]
    .assign(
        geometry=lambda x: gpd.points_from_xy(x=x.stop_lon, y=x.stop_lat)
    )
    .drop(["stop_lon", "stop_lat"], axis=1)
    .pipe(gpd.GeoDataFrame, crs=4326)
)
stops.geometry = stops.to_crs(epsg=3857).buffer(30).to_crs(epsg=4326)
stops.head()

Unnamed: 0,stop_id,stop_code,stop_name,wheelchair_boarding,location_type,parent_station,platform_code,geometry
35759,340001992J2,oxfgjpaw,Queens Lane (Stop J2),0,,,,"POLYGON ((-1.25109 51.75264, -1.25109 51.75263..."
35760,340001992J3,oxfgjmjt,Queens Lane (Stop J3),0,,,,"POLYGON ((-1.25126 51.75265, -1.25126 51.75264..."
35761,340001992K1,oxfgjmtm,Queens Lane (Stop K1),0,,,,"POLYGON ((-1.25126 51.75279, -1.25126 51.75277..."
35766,340001989S1,oxfgjgtm,Speedwell Street (Stop S1),0,,,,"POLYGON ((-1.25730 51.74842, -1.25730 51.74840..."
35767,340001989S2,oxfgjmap,Speedwell Street (Stop S2),0,,,,"POLYGON ((-1.25774 51.74848, -1.25774 51.74846..."


## And compare them!

In [10]:
df_map = pd.concat([df[["geometry"]].assign(tp=0), stops[["geometry"]].assign(tp=1)])
# df_map.explore(tiles="CartoDB positron", cmap="viridis", column="tp")

In [11]:
def filter_relevant_stops(row):
    return (
        row.stop_id in rel_stops.loc[rel_stops.trip_id == row.trip_id].stop_id.unique()
    )

In [12]:
jnd = (
    df.sjoin(stops, how="left", predicate="within")
    .dropna(subset="index_right")
    .assign(keep=lambda x: x.apply(filter_relevant_stops, axis=1))
    .loc[lambda x: x.keep]
)
assert len(jnd.loc[jnd.duplicated(["trip_id", "timestamp"])]) == 0
jnd = jnd.assign(
    cum_stop=jnd.groupby(["trip_id", "current_stop", "stop_id"]).cumcount()
)
jnd.head()

Unnamed: 0,id,trip_id,route_id,start_time,start_date,current_stop,current_status,timestamp,vehicle,geometry,index_right,stop_id,stop_code,stop_name,wheelchair_boarding,location_type,parent_station,platform_code,keep,cum_stop
1,15671302897479048672,VJa30a7b0edb53edbd19908a14241d6f9f8d43fd1a,50065,16:25:00,20221015,14,2,2022-10-15 16:37:43+00:00,311,POINT (-1.21449 51.73141),36283.0,340001255OUT,oxfagwpm,Templars Square (Stop E),0.0,,,,True,0
2,8931724906858233862,VJbf1af309850819f0c13270b1c82a48b16c1448bf,3815,16:34:00,20221015,0,1,2022-10-15 16:37:37+00:00,SCOX-10439,POINT (-1.26963 51.75335),35774.0,340000006R2,oxfamjmj,Railway Station (Stop R2),0.0,,,,True,0
4,2588383118819005104,VJ86c3f449922868daf9a499258fafb398d30fa050,4824,16:00:00,20221015,23,2,2022-10-15 16:37:36+00:00,SCOX-10679,POINT (-1.20176 51.74399),38392.0,340001364BUR,oxfagmjm,Corner House,0.0,,,,True,0
5,13198027651868249401,VJa3cf6467349c7ca904f66eb783c6a2a7ce0efc1c,3815,16:14:00,20221015,11,2,2022-10-15 16:37:29+00:00,SCOX-10786,POINT (-1.22598 51.74218),38231.0,340001197CNR,oxfadmdt,Howard Street east,0.0,,,,True,0
7,11112882734452251887,VJ29c5fe0bd4f76c6ec3fb9943b133ea30701bebf3,4824,15:50:00,20221015,29,2,2022-10-15 16:37:49+00:00,SCOX-12006,POINT (-1.22937 51.74461),38226.0,340001195OUT,oxfadjmp,Magdalen Road east,0.0,,,,True,0


In [13]:
cols = [
    "current_stop",
    "current_status",
    "timestamp",
    "stop_id",
    "stop_name",
    "cum_stop",
]
long_stops = jnd.loc[jnd.stop_id.isin(jnd.loc[jnd.cum_stop >= 1].stop_id.unique())]
long_stops = long_stops.sort_values(by=["trip_id", "current_stop"])
long_stops[cols]

Unnamed: 0,current_stop,current_status,timestamp,stop_id,stop_name,cum_stop
9136,0,1,2022-10-15 17:48:43+00:00,340000008JR7,JR Hospital West Wing (Stop J7),0
9193,0,1,2022-10-15 17:49:03+00:00,340000008JR7,JR Hospital West Wing (Stop J7),1
9250,1,2,2022-10-15 17:49:44+00:00,340000008JR7,JR Hospital West Wing (Stop J7),0
9745,1,2,2022-10-15 17:54:07+00:00,340000008JR7,JR Hospital West Wing (Stop J7),1
9799,1,2,2022-10-15 17:54:27+00:00,340000008JR7,JR Hospital West Wing (Stop J7),2
...,...,...,...,...,...,...
9801,14,2,2022-10-15 17:54:26+00:00,340001201OPP,Clive Road,1
10208,15,2,2022-10-15 17:58:09+00:00,340001257BTW,The Original Swan,0
10266,15,2,2022-10-15 17:58:29+00:00,340001257BTW,The Original Swan,1
10382,16,2,2022-10-15 17:59:30+00:00,340001255OPP,Templars Square (Stop D),0


In [14]:
def diff_calc(x):
    return x.iloc[-1] - x.iloc[0]

In [15]:
stop_time = pd.DataFrame(
    jnd.groupby(["route_id", "trip_id", "stop_id", "stop_name"])
    .timestamp.agg(diff_calc)
    .astype("timedelta64[s]")
)
stop_time.sort_values(by="timestamp", ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,timestamp
route_id,trip_id,stop_id,stop_name,Unnamed: 4_level_1
3815,VJ9db06ecf8f62a60722619a5e3102160b105d772a,340001230OPP,Sandy Lane West,893.0
4824,VJ42fa629e675cecbbb3b7a87f5c1f8f70d9ecf525,340001989S2,Speedwell Street (Stop S2),428.0
4824,VJ00044550d9264cd702671f3d707d4fe37f8e7975,340000008JR7,JR Hospital West Wing (Stop J7),344.0
14187,VJ6ec84b5fa701ce06046d0c4997b01b77514eb540,340002074G5,St Aldates (Stop G5),331.0
4824,VJea161b21514f939fc2df3e4bd2370763b879d24f,340000008JR7,JR Hospital West Wing (Stop J7),323.0
...,...,...,...,...
3815,VJa40c34a692aefb9b68d7fa0a8c2c1062ac24b93c,340003333MAR,Stockmore Street,0.0
3815,VJa40c34a692aefb9b68d7fa0a8c2c1062ac24b93c,340001989S1,Speedwell Street (Stop S1),0.0
3815,VJa40c34a692aefb9b68d7fa0a8c2c1062ac24b93c,340001126CIR,The Plain,0.0
3815,VJa40c34a692aefb9b68d7fa0a8c2c1062ac24b93c,340000009R9,Park End Street (Stop D6),0.0
