# Visualizing Real Time Performance of a GTFS Feed

This map shows the [Big Blue Bus](http://gtfs.bigbluebus.com/) GTFS feed and computes some basic real time performance stats, ie, is the bus on time. 

It utilizes the follow elements from GTFS-RT & GTFS-Schedule 
* Vehicle Positions 
* Trip Updates 
* Stop Times 

If a trip is green, it means that for the next scheduled stop, the predicted stop time is on or before the scheduled time. If a trip is red, it means the opposite, that is, the trip is behind schedule. Blue trips are not scheduled, ie, they are running extra buses, etc. 

In [1]:
import pydeck as pdk
import pandas as pd
import geopandas as gpd
import time
import asyncio
from IPython.display import display
import altair as alt

In [9]:
asyncio.all_tasks

<function asyncio.tasks.all_tasks(loop=None)>

In [2]:
def colorize(val):
    """
    Colorize trips with green for on time, red for late, 
    blue for unscheduled. 
    """
    if val == True: 
        return [255, 0, 0]
    elif val == False:
        return [0, 255,0]
    else: 
        return [60, 220, 255]

In [3]:
from google.transit import gtfs_realtime_pb2
import requests

def update_df():
    """
    Gets the latest data from 
    """
    # Vehicle Positions 
    feed = gtfs_realtime_pb2.FeedMessage()
    response = requests.get('http://gtfs.bigbluebus.com/vehiclepositions.bin')
    feed.ParseFromString(response.content)
    locs = []
    for entity in feed.entity:
        lat = entity.vehicle.position.latitude
        long = entity.vehicle.position.longitude
        trip_id = entity.vehicle.trip.trip_id
        locs.append({'lat': lat, 'long': long, 'trip_id': trip_id})
    df = pd.DataFrame(locs)
    # Trip Updates
    feed = gtfs_realtime_pb2.FeedMessage()
    response = requests.get('http://gtfs.bigbluebus.com/tripupdates.bin')
    feed.ParseFromString(response.content)
    rt_preds_next = [] # make a list of the next arrival predictions 
    for entity in feed.entity:
        trip_id = entity.trip_update.trip.trip_id
        for update in entity.trip_update.stop_time_update:
            if update.arrival:
                pred_time = pd.to_datetime(update.arrival.time, unit='s')
                if pred_time.year == 2021:
                    stop_id = update.stop_id
                    rt_preds_next.append({'trip_id': trip_id, 'pred_time': pred_time, 'stop_id': stop_id})
                    break
    # DETERMINE IF A TRIP IS RUNNING LATE
    preds_df= pd.DataFrame(rt_preds_next)
    preds_df = preds_df.astype({'trip_id': 'int64', 'stop_id': 'int64'})
    scheduled_stop_time = pd.read_csv("http://gtfs.bigbluebus.com/parsed/stop_times.txt")

    schedule = scheduled_stop_time[['trip_id','arrival_time','stop_id']]
    new_df = pd.merge(schedule, preds_df,  how='inner', left_on=['trip_id','stop_id'], right_on = ['trip_id','stop_id'])
    new_df['arrival_time'] = pd.to_datetime(new_df['arrival_time']).dt.tz_localize('America/Los_Angeles')
    new_df['pred_time'] = new_df.pred_time.dt.tz_localize('UTC').dt.tz_convert('America/Los_Angeles')
    new_df['running_late'] = new_df.pred_time > new_df.arrival_time
    # MERGE with vehicle positions 
    map_df = df.astype({'trip_id':'int64'}).set_index('trip_id').join(new_df[['running_late','trip_id']].set_index('trip_id'))
    map_df['running_late'] = map_df.running_late.fillna('unscheduled')
    map_df['color'] = map_df.running_late.apply(colorize)
    return map_df.reset_index()

In [4]:
# routes was created with a util
gdf = gpd.read_file('../data/bigblue/bigblue.geojson')

def hex_to_rgb(h):
    h = h.lstrip("#")
    return tuple(int(h[i : i + 2], 16) for i in (0, 2, 4))

def make_path(geom):
    """
    Take a geom, make a path
    """
    coords = []
    for x, y in zip(geom.xy[0], geom.xy[1]):
        coords.append([x, y])
    return coords

gdf["route_color"] = gdf["route_color"].apply(hex_to_rgb)
gdf["path"] = gdf.geometry.apply(make_path)

In [5]:
map_df = update_df()

In [6]:
display("Number of Vehicles Running Late")
print(map_df.running_late.value_counts())

'Number of Vehicles Running Late'

unscheduled    50
Name: running_late, dtype: int64


In [7]:
view_state = pdk.ViewState(latitude=34.04545, longitude=-118.44676, zoom=10)
# 
line_layer = pdk.Layer(
    type="PathLayer",
    data=gdf,
    get_color="route_color",
    width_scale=10,
    width_min_pixels=2,
    get_path="path",
    get_width=5,
    pickable=False
)


# Define a layer to display on a map
realtime_layer = pdk.Layer(
    "ScatterplotLayer",
    map_df,
    get_position=["long", "lat"],
    get_fill_color= "color",
    get_radius= 75,
    pickable=True,
)

r = pdk.Deck(layers=[line_layer, realtime_layer], initial_view_state=view_state, tooltip={"html": "<b>Running Late?:</b> {running_late}<br><b>Trip ID:</b> {trip_id}"})
r.show()

DeckGLWidget(carto_key=None, custom_libraries=[], google_maps_key=None, json_input='{"initialViewState": {"lat…

In [9]:
all_tasks = asyncio.all_tasks()
for task in all_tasks:
    task.cancel()

async def do():
    while True:
        records = update_df()
        display(alt.Chart(map_df).mark_bar().encode(
            x='running_late',
            y='count(running_late)'
        ))
        realtime_layer.data = records
        r.update()
        display("waited 30 seconds for updates")
        await asyncio.sleep(30)

asyncio.create_task(do());

'waited 30 seconds for updates'