In [5]:
import pandas as pd
import altair as alt

In [63]:
mbta = pd.read_csv("Downloads/MBTA_Line_and_Stop.csv")
mbta = mbta.loc[mbta["route_name"] == "Green Line"]
new = mbta["stop_name"].isin(["Heath Street", "Back of the Hill", "Riverway", "Mission Park",
                "Fenwood Road", "Brigham Circle", "Longwood", "Musuem of Fine Arts",
                "Northeastern University", "Symphony", "Prudential", "Copley"])
mbta = mbta[new]
# By narrowing our dataset, we can make it easier to look at in Altair. Rather than using hundreds of stops,
# we can focus on the E branch of the MBTA green line.

Unnamed: 0,FID,mode,season,route_id,route_name,direction_id,day_type_id,day_type_name,time_period_id,time_period_name,stop_name,stop_id,total_ons,total_offs,number_service_days,average_ons,average_offs,average_flow
3,4,0,Fall 2019,Green,Green Line,0,day_type_01,weekday,time_period_01,VERY_EARLY_MORNING,Back of the Hill,place-bckhl,0,36,77,0,0,4
11,12,0,Fall 2019,Green,Green Line,0,day_type_01,weekday,time_period_01,VERY_EARLY_MORNING,Brigham Circle,place-brmnl,1,1868,77,0,24,11
20,21,0,Fall 2019,Green,Green Line,0,day_type_01,weekday,time_period_01,VERY_EARLY_MORNING,Copley,place-coecl,1414,8290,77,18,108,160
26,27,0,Fall 2019,Green,Green Line,0,day_type_01,weekday,time_period_01,VERY_EARLY_MORNING,Fenwood Road,place-fenwd,0,145,77,0,2,9
32,33,0,Fall 2019,Green,Green Line,0,day_type_01,weekday,time_period_01,VERY_EARLY_MORNING,Heath Street,place-hsmnl,0,290,77,0,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4368,4369,0,Fall 2017,Green,Green Line,1,day_type_01,weekday,time_period_09,NIGHT,Heath Street,place-hsmnl,44,0,82,1,0,1
4376,4377,0,Fall 2017,Green,Green Line,1,day_type_01,weekday,time_period_09,NIGHT,Longwood,place-longw,100,30,82,1,0,11
4379,4380,0,Fall 2017,Green,Green Line,1,day_type_01,weekday,time_period_07,EVENING,Back of the Hill,place-bckhl,700,52,82,9,1,39
4387,4388,0,Fall 2017,Green,Green Line,1,day_type_01,weekday,time_period_07,EVENING,Brigham Circle,place-brmnl,22077,403,82,269,5,383


In [96]:
interval = alt.selection_interval()
interval2 = alt.selection_interval()
selector = alt.selection_interval()

# Each chart needs a selection of its own

In [131]:
# First base is the scatter plot
base = alt.Chart(mbta, width = 350, title = "Riders entering and exiting exiting stops on the MBTA Green Line E branch"
                ).mark_point().encode(
    # X is total ons and Y is total offs
    x = alt.X("total_ons:Q", axis = alt.Axis(title = "Total riders entering")),
    y = alt.Y("total_offs:Q", axis = alt.Axis(title = "Total riders exiting")),
    
    # Integrating the interval with color and adding a tooltip
    color = alt.condition(interval, 'stop_name', alt.value('lightgray')),
    tooltip = alt.Tooltip(["stop_name", "season", "average_flow", "day_type_name", "time_period_name"])
    ).add_selection(interval).transform_filter(interval2).transform_filter(selector)

# Second chart is a line chart with season on the x axis and flow on the y axis
line = alt.Chart(mbta, title = "Average rider flow from 2017-2019", width = 200).mark_line().encode(
    x = alt.X("season:N", axis = alt.Axis(title = "Season")),
    y = alt.Y("mean(average_flow)", axis = alt.Axis(title = "Average rider flow"))
    
    #adding my filters and selections here
    ).add_selection(interval2).transform_filter(interval).transform_filter(selector)

# Third one here adds points to the line chart, also uses a tooltip to show the flow better
points = alt.Chart(mbta, width = 200).mark_point().encode(
    x = "season:N",
    y = alt.Y("mean(average_flow)"),
    tooltip = alt.Tooltip(["mean(average_flow)"])
    ).transform_filter(interval).transform_filter(selector)

#This allows the user to select a stop to look at more closely
stop_selector = alt.Chart(mbta, width = 50).mark_point().encode(
    x = alt.X("FID", axis = alt.Axis(title = "", labelOpacity = 0)),
    y = alt.Y("stop_name:N", axis = alt.Axis(title = "Stop name")),
    color = alt.condition(selector, "stop_name", alt.value('lightgray'))).add_selection(selector)

stop_selector | base | line + points

When I saw the data set, I wanted to look at ridership specifically on the Green Line, because that's the one I use the most to get to and from Northeastern's campus. After filtering the data to only include Green Line E branch stops, I showed ridership in a scatterplot for each instance of data collection and then showed the flow over time in a line chart after brushing and linking the two.

What I found from these graphs is that ridership had decreased over time, but also that Copley is so much busier than any other stop, most likely because of the other branches of the Green Line that it connects to.

The pop out effect I used was included in the Stop selector part of the graph. You have to highlight the entire line of the stop you want to explore and then you can see the stop on the graphs. 