# ðŸš‡ Green Line Extension Ridership Map

Interactive metro-style visualization showing average daily ridership by station across 2023-2025.

**Features:**
- Stacked circles per station (one per year)
- Circle size = ridership volume
- Dropdown to switch between yearly average and individual months

In [93]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np

In [94]:
# Load the data
df = pd.read_csv('GLX_Ridership_Total_Monthly_and_Avg_Daily__R001475-120525_.csv')

# Use the upperbound average daily ridership
df = df[['year', 'month', 'stop_name', 'highbound_avg_daily_ons']].copy()
df.columns = ['year', 'month', 'station', 'ridership']

print(f"Data shape: {df.shape}")
print(f"Years: {sorted(df['year'].unique())}")
print(f"Stations: {df['station'].unique()}")

Data shape: (396, 4)
Years: [np.int64(2023), np.int64(2024), np.int64(2025)]
Stations: ['Ball Square' 'East Somerville' 'Gilman Square' 'Government Center'
 'Haymarket' 'Lechmere' 'Magoun Square' 'Medford/Tufts' 'North Station'
 'Science Park/West End' 'Union Square']


In [116]:
# Define station order following the GLX line geography
# Main trunk from downtown, then branching
STATION_ORDER = [
    #'Government Center',
    #'Haymarket', 
    #'North Station',
    'Science Park/West End',
    'Lechmere',
    'Union Square',        # Union Square branch (shown offset)
    'East Somerville',
    'Gilman Square',
    'Magoun Square',
    'Ball Square',
    'Medford/Tufts'
]

# Station positions for metro-style layout
# Format: (x, y) - horizontal layout with Union Square branch offset
STATION_POSITIONS = {
    #'Government Center': (0, 0),
    #'Haymarket': (1.5, 0),
    #'North Station': (0, 0),
    'Science Park/West End': (0, 0),
    'Lechmere': (1.5, 0),
    'Union Square': (3, -1.2),      # Branch down
    'East Somerville': (3, 0),
    'Gilman Square': (4.5, 0),
    'Magoun Square': (6, 0),
    'Ball Square': (7.5, 0),
    'Medford/Tufts': (9, 0)
}

# Years and their colors
YEARS = [2023, 2024, 2025]
YEAR_COLORS = {
    2023: '#9B59B6',  # Purple
    2024: "#00AD8B",  # Teal  
    2025: '#F39C12'   # Orange
}

MONTHS = {
    0: 'Yearly Average',
    1: 'January', 2: 'February', 3: 'March', 4: 'April',
    5: 'May', 6: 'June', 7: 'July', 8: 'August',
    9: 'September', 10: 'October', 11: 'November', 12: 'December'
}

In [117]:
remaining = df[df['station'].isin(STATION_ORDER)]
print(f"Min: {remaining['ridership'].min()}, Max: {remaining['ridership'].max()}")  

Min: 100, Max: 4000


In [118]:
def get_ridership_data(df, month=0):
    """
    Get ridership data for visualization.
    month=0 means yearly average, 1-12 for specific months.
    """
    if month == 0:
        # Yearly average
        data = df.groupby(['year', 'station'])['ridership'].mean().reset_index()
    else:
        # Specific month
        data = df[df['month'] == month][['year', 'station', 'ridership']].copy()
    
    return data

def scale_size(ridership, min_size=5, max_size=70):
    remaining = df[df['station'].isin(STATION_ORDER)]
    r_min, r_max = remaining['ridership'].min(), remaining['ridership'].max()
    
    # Linear scaling (more dynamic range)
    normalized = (ridership - r_min) / (r_max - r_min)
    normalized = np.clip(normalized, 0, 1)
    
    return min_size + normalized * (max_size - min_size)

In [121]:
def create_metro_map(df):
    """
    Create the interactive metro map with dropdown for month selection.
    """
    fig = go.Figure()
    
    # Vertical offset for stacking circles (year offset)
    year_offsets = {2023: 0.5, 2024: 0, 2025: -0.5}
    
    # Create traces for each month option
    all_traces = []
    traces_per_view = 5 + len(YEARS)
    
    for month_num in range(13):  # 0 = yearly avg, 1-12 = months
        data = get_ridership_data(df, month_num)
        
        # --- Metro Lines ---
        # Main line (Government Center to Lechmere)
        main_line_stations = [#'Government Center', 
                              #'Haymarket', 
                              #'North Station', 
                              'Science Park/West End', 
                              'Lechmere']
        main_x = [STATION_POSITIONS[s][0] for s in main_line_stations]
        main_y = [STATION_POSITIONS[s][1] for s in main_line_stations]
        
        all_traces.append(go.Scatter(
            x=main_x, y=main_y,
            mode='lines',
            line=dict(color='#00843D', width=8),
            hoverinfo='skip',
            showlegend=False,
            visible=(month_num == 0)
        ))
        
        # Medford branch (Lechmere to Medford/Tufts via main stations)
        medford_stations = ['Lechmere', 'East Somerville', 'Gilman Square', 
                           'Magoun Square', 'Ball Square', 'Medford/Tufts']
        medford_x = [STATION_POSITIONS[s][0] for s in medford_stations]
        medford_y = [STATION_POSITIONS[s][1] for s in medford_stations]
        
        all_traces.append(go.Scatter(
            x=medford_x, y=medford_y,
            mode='lines',
            line=dict(color='#00843D', width=8),
            hoverinfo='skip',
            showlegend=False,
            visible=(month_num == 0)
        ))
        
        # Union Square branch
        union_x = [STATION_POSITIONS['Lechmere'][0], STATION_POSITIONS['Union Square'][0]]
        union_y = [STATION_POSITIONS['Lechmere'][1], STATION_POSITIONS['Union Square'][1]]
        
        all_traces.append(go.Scatter(
            x=union_x, y=union_y,
            mode='lines',
            line=dict(color='#00843D', width=8),
            hoverinfo='skip',
            showlegend=False,
            visible=(month_num == 0)
        ))
        
        # Station dots (white circles on the line)
        station_x = [STATION_POSITIONS[s][0] for s in STATION_ORDER]
        station_y = [STATION_POSITIONS[s][1] for s in STATION_ORDER]
        
        all_traces.append(go.Scatter(
            x=station_x, y=station_y,
            mode='markers',
            marker=dict(
                size=12,
                color='#ffffff',
                line=dict(color='#00843D', width=3)
            ),
            hoverinfo='skip',
            showlegend=False,
            visible=(month_num == 0)
        ))
        
        # Station labels
        label_y_offset = [0.3 if s != 'Union Square' else -0.3 for s in STATION_ORDER]
        label_y = [STATION_POSITIONS[s][1] + label_y_offset[i] for i, s in enumerate(STATION_ORDER)]
        
        all_traces.append(go.Scatter(
            x=station_x,
            y=label_y,
            mode='text',
            text=[s.replace('/', '/<br>') for s in STATION_ORDER],
            textfont=dict(size=9, color='#1a1a2e'),
            hoverinfo='skip',
            showlegend=False,
            visible=(month_num == 0)
        ))
        
        # --- Ridership Circles (stacked by year) ---
        for year in YEARS:
            year_data = data[data['year'] == year]
            
            circle_x = []
            circle_y = []
            sizes = []
            hover_texts = []
            
            for station in STATION_ORDER:
                station_data = year_data[year_data['station'] == station]
                
                if len(station_data) > 0:
                    ridership = station_data['ridership'].values[0]
                    base_x, base_y = STATION_POSITIONS[station]
                    
                    # Offset circles horizontally for stacking effect
                    x_offset = year_offsets[year] * 0
                    y_offset = year_offsets[year] if station != 'Union Square' else -year_offsets[year]
                    
                    circle_x.append(base_x + x_offset)
                    circle_y.append(base_y + y_offset * 1.2 + (1.3 if station != 'Union Square' else -1.3))
                    sizes.append(scale_size(ridership))
                    
                    period_name = MONTHS[month_num]
                    hover_texts.append(
                        f"<b>{station}</b><br>"
                        f"Year: {year}<br>"
                        f"Period: {period_name}<br>"
                        f"Avg Daily Riders: {ridership:,.0f}"
                    )
            
            all_traces.append(go.Scatter(
                x=circle_x,
                y=circle_y,
                mode='markers',
                marker=dict(
                    size=sizes,
                    color=YEAR_COLORS[year],
                    opacity=0.85,
                    line=dict(color='black', width=1)
                ),
                text=hover_texts,
                hovertemplate='%{text}<extra></extra>',
                name=str(year),
                showlegend=True,
                visible=(month_num == 0),
                legendgroup=str(year)
            ))
    
    # Add all traces to figure
    for trace in all_traces:
        fig.add_trace(trace)
    
    # Create dropdown buttons
    buttons = []
    for month_num in range(13):
        # Calculate visibility: show only traces for this month
        visibility = [False] * len(all_traces)
        start_idx = month_num * traces_per_view
        for i in range(traces_per_view):
            visibility[start_idx + i] = True
        
        buttons.append(dict(
            label=MONTHS[month_num],
            method='update',
            args=[{'visible': visibility}]
        ))
    
    # Update layout
    fig.update_layout(
        title=dict(
            text='<b>GREEN LINE EXTENSION</b><br><span style="font-size:14px;color:#888">Average Daily Ridership by Station</span>',
            x=0.5,
            font=dict(size=24, color='#00843D')
        ),
        updatemenus=[
            dict(
                active=0,
                buttons=buttons,
                direction='down',
                showactive=True,
                x=0.02,
                xanchor='left',
                y=0.98,
                yanchor='top',
                bgcolor='#ffffff',
                bordercolor='#00843D',
                font=dict(color='black')
            )
        ],
        annotations=[
            dict(
                text='<b>Time Period:</b>',
                x=0.02,
                xref='paper',
                y=1.06,
                yref='paper',
                showarrow=False,
                font=dict(size=12, color='#AAA')
            )
        ],
        plot_bgcolor='#ffffff',
        paper_bgcolor='#ffffff',
        font=dict(color='white'),
        showlegend=True,
        legend=dict(
            title=dict(text='<b>Year</b>', font=dict(color='#333333')),  
            bgcolor='#ffffff',                                            
            bordercolor='#00843D',
            borderwidth=1,
            font=dict(color='#333333'),                                   
            x=0.85,
            y=0.98,
            xanchor='right',
            yanchor='top'
        ),
        xaxis=dict(
            showgrid=False,
            showticklabels=False,
            zeroline=False,
            range=[0, 12]
        ),
        yaxis=dict(
            showgrid=False,
            showticklabels=False,
            zeroline=False,
            scaleanchor='x',
            scaleratio=1,
            range=[-4, 3.5]
        ),
        width=1100,
        height=550,
        margin=dict(l=20, r=20, t=100, b=40),
        hoverlabel=dict(
            bgcolor='rgba(0,0,0,0.9)',
            bordercolor='#00843D',
            font=dict(color='white', size=12)
        )
    )
    
    return fig

In [122]:
# Create and display the map
fig = create_metro_map(df)
fig.show()

## Total monthly ridership

In [None]:
# Calculate total monthly ridership across all GLX stations
glx_stations = STATION_ORDER  # only the stations we're showing from above
monthly_totals = df[df['station'].isin(glx_stations)].groupby(['year', 'month'])['ridership'].sum().reset_index()

# Create line plot
fig_line = go.Figure()

for year in YEARS:
    year_data = monthly_totals[monthly_totals['year'] == year]
    fig_line.add_trace(go.Scatter(
        x=year_data['month'],
        y=year_data['ridership'],
        mode='lines+markers',
        name=str(year),
        line=dict(color=YEAR_COLORS[year], width=3),
        marker=dict(size=8)
    ))

fig_line.update_layout(
    title=dict(
        text='<b>GLX Total Monthly Ridership</b>',
        x=0.5,
        font=dict(size=20, color='#00843D')
    ),
    xaxis=dict(
        title='Month',
        tickmode='array',
        tickvals=list(range(1, 13)),
        ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
                  'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
        gridcolor='#e0e0e0'
    ),
    yaxis=dict(
        title='Total Avg Daily Ridership',
        gridcolor='#e0e0e0'
    ),
    plot_bgcolor='#ffffff',
    paper_bgcolor='#ffffff',
    font=dict(color='#333333'),
    legend=dict(
        title=dict(text='<b>Year</b>'),
        bgcolor='#ffffff',
        bordercolor='#00843D',
        borderwidth=1
    ),
    width=900,
    height=400,
    hovermode='x unified'
)

fig_line.show()

## Ridership Summary Statistics

In [None]:
# Summary table: yearly averages by station
summary = df.groupby(['station', 'year'])['ridership'].mean().unstack()
summary = summary.reindex(STATION_ORDER)
summary['Change 2023â†’2025'] = ((summary[2025] - summary[2023]) / summary[2023] * 100).round(1).astype(str) + '%'
summary = summary.round(0).astype({2023: int, 2024: int, 2025: int})
summary.columns = ['2023 Avg', '2024 Avg', '2025 Avg', 'Change 2023â†’2025']
print("Average Daily Ridership by Station and Year:")
print("=" * 70)
summary

In [None]:
# Export to standalone HTML (optional)
fig.write_html('glx_ridership_map.html', include_plotlyjs='cdn')
print("Saved interactive map to: glx_ridership_map.html")

---

**Notes:**
- Circle size represents upperbound average daily ridership
- Use the dropdown menu (top-left) to switch between yearly average and individual months
- Hover over circles to see detailed ridership data
- Data source: MBTA GLX Ridership Data (2023-2025)