In [1]:
import pandas as pd
import os

import plotly.express as px

In [2]:
point_panel_in = "../../../data/final/point_panel.parquet"
line_panel_in = "../../../data/final/line_panel.parquet"
tract_panel_in = "../../../data/final/tract_panel.parquet"
comm_panel_in = "../../../data/final/comm_panel.parquet"

In [3]:
point_panel = pd.read_parquet(point_panel_in)
line_panel = pd.read_parquet(line_panel_in)
tract_panel = pd.read_parquet(tract_panel_in)
comm_panel = pd.read_parquet(comm_panel_in)

In [4]:
bus_panel = line_panel[line_panel.transit == 'bus']
train_panel = point_panel[point_panel.transit == 'train']
uber_panel = tract_panel[tract_panel.transit == 'uber']
bike_panel = point_panel[point_panel.transit == 'bike']

cta_panel = pd.concat([train_panel, bus_panel], ignore_index=True, join='inner')
all_panel = pd.concat([train_panel, bus_panel, uber_panel, bike_panel], ignore_index=True, join='inner')

# Total Rides

In [5]:
total = cta_panel.groupby('date')['rides'].sum()
f"Avg daily rides on all CTA {total.mean() / 1e3 :.1f}K (SD {total.std() / 1e3 :.1f}K)"

'Avg daily rides on all CTA 790.6K (SD 139.6K)'

In [6]:
total = cta_panel[cta_panel.UCMP == 1].groupby('date')['rides'].sum()
f"Avg daily rides at UC/MP {total.mean() / 1e3:.0f}K (SD {total.std() / 1e3 :.1f}K)"

'Avg daily rides at UC/MP 147K (SD 27.1K)'

In [7]:
total = line_panel[line_panel.UCMP == 1].groupby(['date'])['rides'].sum()
f"Avg daily rides on LINES serving UC/MP {total.mean() / 1e3:.0f}K (SD {total.std() / 1e3 :.1f}K)"

'Avg daily rides on LINES serving UC/MP 325K (SD 52.4K)'

In [8]:
total = line_panel[line_panel.UCMP == 1].groupby(['date','transit'])['rides'].sum().groupby('transit').mean()
print("Avg daily rides on LINES serving UC/MP (thousands):")
print((total / 1e3).round().astype(int))

Avg daily rides on LINES serving UC/MP (thousands):
transit
bus      136
train    189
Name: rides, dtype: int64


In [9]:
total = cta_panel[cta_panel.UCMP == 1].groupby(['date','transit'])['rides'].sum().groupby('transit').mean()
total = (total / 1e3).round().astype(int)
total = pd.concat([total, cta_panel[cta_panel.UCMP == 1].groupby('transit')['id'].nunique().rename('nstations')],axis=1)
print("Avg daily rides at UC/MP (thousands):")
print(total)

Avg daily rides at UC/MP (thousands):
         rides  nstations
transit                  
bus        136         24
train       11          8


In [10]:
total = tract_panel[tract_panel.UCMP == 1].groupby(['date','transit'])['rides'].sum().groupby('transit').mean()
total = (total / 1e3).round().astype(int)
total = pd.concat([total, tract_panel[tract_panel.UCMP == 1].groupby('transit')['id'].nunique().rename('ntracts')],axis=1)
print("Avg daily rides in UC/MP tracts (thousands):")
print(total)


Avg daily rides in UC/MP tracts (thousands):
         rides  ntracts
transit                
bike         6       37
train       31       14
uber        33       40


# Misc

In [11]:
tract_panel.id.nunique(), comm_panel.id.nunique()

(1260, 77)

# TS

In [12]:
plot_data = all_panel[(all_panel.UCMP == 1)&(all_panel.date < "2024-08-19")].groupby(['date','transit'])['rides'].sum().reset_index()
fig = px.line(plot_data, x='date', y='rides', color='transit')
fig.write_json("../../../reports/replication/avg-daily-rides.json")
fig.show()