In [1]:
import pandas as pd
from datetime import datetime
from zipfile import ZipFile
import requests
import io
import glob
from bokeh.io import output_notebook, show
from bokeh.models import (
    ColumnDataSource,
    CategoricalColorMapper,
    FactorRange,
    Legend,
    LegendItem,
    GMapOptions,
    HoverTool,
)
from bokeh.transform import transform, dodge
from bokeh.plotting import figure, gmap
from ipywidgets import Layout, Box, interactive
import ipywidgets as widgets
from IPython.display import display

In [2]:
class Feed:
    def __init__(self, url):
        res = requests.get(url)
        zip = ZipFile(io.BytesIO(res.content), "r")
        zip.extractall("temp")
        for file in glob.glob("temp/*.txt"):
            table = file.split("temp/")[1].split(".")[0]
            setattr(self, table, pd.read_csv(file, low_memory=False))
        

In [3]:
all_feeds = pd.read_csv("https://cdn.mbta.com/archive/archived_feeds.txt")

In [4]:
feed = Feed(all_feeds.loc[0].archive_url)

In [5]:
def datestr_to_date(datestr):
    if type(datestr) != str:
        datestr = str(datestr)
    return datetime.strptime(datestr, "%Y%m%d")

def get_date_status(feed, date, service_id):
    date = pd.to_datetime(date.date)
    calendar = (
        feed.calendar.set_index("service_id")
        .rename(
            columns={
                "monday": 0,
                "tuesday": 1,
                "wednesday": 2,
                "thursday": 3,
                "friday": 4,
                "saturday": 5,
                "sunday": 6,
            }
        )
        .loc[service_id]
    )
    start_date = datestr_to_date(calendar.start_date)
    end_date = datestr_to_date(calendar.end_date)
    status = (
        "Scheduled"
        if date >= start_date and date <= end_date and calendar[date.weekday()]
        else "Not Scheduled"
    )
    calendar_dates = feed.calendar_dates.set_index(["service_id", "date"])
    try:
        return (
            "Added"
            if calendar_dates.loc[service_id, datetime.strftime(date, "%Y%m%d")].exception_type
            == 1
            else "Removed"
        )
    except KeyError:
        return status


def get_feed_date_range(feed):
    return pd.date_range(
        datestr_to_date(feed.feed_info.loc[0, "feed_start_date"]),
        datestr_to_date(feed.feed_info.loc[0, "feed_end_date"]),
    )


def service_calmap(feed, service_id):
    dates = get_feed_date_range(feed)
    df = pd.DataFrame({"date": dates})

    df["year"] = df.apply(lambda x: x.date.strftime("%Y"), axis=1)
    df["month"] = df.apply(lambda x: x.date.strftime("%b"), axis=1)
    df["dow"] = df.apply(lambda x: x.date.strftime("%a"), axis=1)
    df["day"] = df.apply(lambda x: x.date.strftime("%d"), axis=1)
    df["week_number"] = df.apply(lambda x: x.date.strftime("%V"), axis=1)

    df = df.set_index(["year", "month", "week_number"])
    df = df.sort_index()

    df["status"] = df.apply(lambda x: get_date_status(feed, x, service_id), axis=1)
    df["status"] = df["status"].astype("category")

    x_range = sorted(list(set(df.index)), key=lambda x: df.loc[x].iloc[0].date)
    output_notebook()

    source = ColumnDataSource(df)

    colors = ["#F7F7FF", "#545E75", "#3F826D", "#A63D40"]
    mapper = CategoricalColorMapper(
        palette=colors, factors=["Not Scheduled", "Scheduled", "Added", "Removed"]
    )

    p = figure(
        plot_width=800, plot_height=300, toolbar_location=None, tools="", x_axis_location="above"
    )

    p.rect(
        x="year_month_week_number",
        y="dow",
        width=1,
        height=1,
        source=source,
        line_color=None,
        fill_color=transform("active", mapper),
    )

    p = figure(
        plot_width=800,
        plot_height=300,
        x_range=FactorRange(*x_range, group_padding=0),
        y_range=["Sun", "Sat", "Fri", "Thu", "Wed", "Tue", "Mon"],
        toolbar_location=None,
        tools="",
        x_axis_location="above",
    )

    rect = p.rect(
        x="year_month_week_number",
        y="dow",
        width=1,
        height=1,
        source=source,
        line_color="black",
        fill_color=transform("status", mapper),
    )

    text_props = {"source": source, "text_align": "center", "text_baseline": "middle"}

    x = dodge("year_month_week_number", 0, range=p.x_range)

    r = p.text(x=x, y="dow", text="day", **text_props)
    r.glyph.text_font_style = "bold"
    r.glyph.text_font_size = "10px"

    p.axis.axis_line_color = None
    p.axis.major_tick_line_color = None
    p.xaxis.major_label_text_font_size = "0px"
    p.axis.axis_label_standoff = 0
    p.xaxis.major_label_orientation = 1.0
    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None
    p.title.text = service_id
    legend = Legend(items=[LegendItem(label={"field": "status"}, renderers=[rect])])
    p.add_layout(legend, "right")
    return p


def trip_geoplot(feed, trip_id):
    trips = feed.trips.set_index("trip_id")
    shape_id = trips.loc[trip_id].shape_id
    shape_pts = feed.shapes.loc[feed.shapes.shape_id == shape_id]
    stop_times = feed.stop_times[feed.stop_times.trip_id == trip_id]
    stops = feed.stops.loc[:, ["stop_id", "stop_lat", "stop_lon", "stop_name"]]
    stops = stop_times.merge(stops, on="stop_id")
    map_options = GMapOptions(lat=42.3624074, lng=-71.0616622, map_type="roadmap", zoom=12)

    # Borrowing the Google API key used by schedule viewer
    # https://github.com/google/transitfeed/blob/23038a96608a45ac74d2f4754ab9b5b39df8c395/schedule_viewer.py#L537
    api_key = "AIzaSyAZTTRO6RC6LQyKCD3JODhxbClsZl95P9U"

    stop_tooltips = [
        ("Stop", "@stop_name"),
        ("Sequence", "@stop_sequence"),
        ("Time", "@departure_time"),
        ("Lat", "@stop_lat"),
        ("Lng", "@stop_lon"),
    ]

    shape_tooltips = [("Lat", "@shape_pt_lat"), ("Lng", "@shape_pt_lon")]

    p = gmap(api_key, map_options)
    stop_circles = p.circle(
        x="stop_lon", y="stop_lat", size=10, fill_color="blue", fill_alpha=0.8, source=stops
    )
    shape_circles = p.circle(
        x="shape_pt_lon",
        y="shape_pt_lat",
        size=1,
        fill_color="grey",
        fill_alpha=0.4,
        source=shape_pts,
    )
    p.line(x="shape_pt_lon", y="shape_pt_lat", source=shape_pts)
    p.add_tools(HoverTool(tooltips=stop_tooltips, renderers=[stop_circles]))
    p.add_tools(HoverTool(tooltips=shape_tooltips, renderers=[shape_circles]))
    return p


def route_variant_time_table(feed, service_id, route_id, route_pattern_id):
    df = feed.trips.loc[
        (
            (feed.trips.service_id == service_id)
            & (feed.trips.route_id == route_id)
            & (feed.trips.route_pattern_id == route_pattern_id)
        ),
        ["trip_id", "route_id"],
    ]
    df = df.merge(feed.stop_times[feed.stop_times.trip_id.isin(df.trip_id)])
    df = df.merge(feed.stops.loc[:, ["stop_id", "stop_name"]], on="stop_id")
    df = df.sort_values("departure_time").reset_index()
    df = df.groupby(["stop_sequence", "stop_name"]).apply(
        lambda x: x.pivot(index="stop_id", columns="trip_id", values="departure_time")
    )
    df = df.sort_values("stop_sequence")
    df = df[sorted(df.columns, key=lambda x: df[x][0])]
    return df


def visualize_feed(feed):
    items_layout = Layout(min_width="50%")

    box_layout = Layout(display="flex", flex_flow="wrap row", width="100%")

    calendar = feed.calendar.merge(feed.calendar_attributes, on="service_id")

    route_dropdown = widgets.Dropdown(
        options=[
            (route["route_long_name"], route["route_id"]) for ix, route in feed.routes.iterrows()
        ],
        description="Route:",
    )

    service_dropdown = widgets.Dropdown(description="Service:",)

    route_variant_dropdown = widgets.Dropdown(description="Route Variant:",)

    trip_dropdown = widgets.Dropdown(description="Example Trip:",)

    def update_route(route):
        services = calendar[
            calendar.service_id.isin(feed.trips[feed.trips.route_id == route].service_id.unique())
        ]
        service_dropdown.options = [
            (
                cal["service_id"]
                + ": "
                + cal["service_description"]
                + " ("
                + str(cal["start_date"])
                + " - "
                + str(cal["end_date"])
                + ")",
                cal["service_id"],
            )
            for ix, cal in services.iterrows()
        ]

    def update_service(service):
        service = service_dropdown.value
        trips = (
            feed.trips.loc[
                (feed.trips.service_id == service) & (feed.trips.route_id == route_dropdown.value),
                ["trip_id", "route_pattern_id", "trip_headsign", "route_id"],
            ]
            .groupby(["route_pattern_id"])
            .first()
        )
        route_variant_dropdown.options = [
            (t["route_id"] + " (" + ix + "): " + t["trip_headsign"], (t["route_id"], ix))
            for ix, t in trips.iterrows()
        ]
        show(service_calmap(feed, service_dropdown.value))

    def update_route_variant(variant):
        route_id, route_pattern_id = variant
        trips = feed.trips.loc[
            (
                (feed.trips.service_id == service_dropdown.value)
                & (feed.trips.route_id == route_id)
                & (feed.trips.route_pattern_id == route_pattern_id)
            )
        ]
        stop_times = (
            feed.stop_times.loc[feed.stop_times.trip_id.isin(trips.trip_id)]
            .sort_values("departure_time")
            .groupby("trip_id")
            .first()
        )
        trips = trips.merge(stop_times, on="trip_id").sort_values("departure_time")
        trip_dropdown.options = [
            (t["trip_id"] + " (" + t["departure_time"] + ")", t["trip_id"])
            for ix, t in trips.iterrows()
        ]
        display(route_variant_time_table(feed, service_dropdown.value, route_id, route_pattern_id))

    def update_trip(trip):
        if trip:
            show(trip_geoplot(feed, trip))

    interactive_route_dropdown = interactive(update_route, route=route_dropdown)

    interactive_service_dropdown = interactive(update_service, service=service_dropdown)

    interactive_route_variant_dropdown = interactive(
        update_route_variant, variant=route_variant_dropdown
    )

    interactive_route_variant_dropdown.observe(update_service, names="value")

    interactive_trip_dropdown = interactive(update_trip, trip=trip_dropdown)

    return Box(
        children=[
            Box([interactive_route_dropdown]),
            Box([interactive_service_dropdown]),
            Box([interactive_route_variant_dropdown], layout=items_layout),
            Box([interactive_trip_dropdown], layout=items_layout),
        ],
        layout=box_layout,
    )


In [6]:
visualize_feed(feed)

Box(children=(Box(children=(interactive(children=(Dropdown(description='Route:', options=(('Red Line', 'Red'),…