In [1]:
import intake
import pandas as pd
import geopandas as gpd

from calitp_data_analysis.geography_utils import CA_NAD83Albers_m

# quick GTFS-based stats for section 1e

In [2]:
catalog = intake.open_catalog("../_shared_utils/shared_utils/shared_data_catalog.yml")

In [20]:
mpos = catalog.metropolitan_planning_orgs.read()[['MPO', 'geometry']].to_crs(CA_NAD83Albers_m).rename(columns={"MPO":"mpo"})

In [5]:
ct_dist = catalog.caltrans_districts.read().to_crs(CA_NAD83Albers_m)

In [7]:
stops = catalog.ca_transit_stops.read().to_crs(CA_NAD83Albers_m).clip(ct_dist)

In [8]:
routes = catalog.ca_transit_routes.read().to_crs(CA_NAD83Albers_m).clip(ct_dist)

In [9]:
hqta = catalog.hqta_areas.read().to_crs(CA_NAD83Albers_m).clip(ct_dist)

In [10]:
routes['length'] = routes.geometry.map(lambda x: x.length)

In [101]:
routes_top2 = routes.sort_values(['agency', 'route_id', 'length'],
                  ascending=False).groupby(['agency', 'route_id']).head(2)
routes_top2 = routes_top2.assign(miles = routes_top2.length / rt_utils.METERS_PER_MILE)

In [168]:
def overlay_to_routes(mpo_gdf, routes_gdf = routes_top2):
    
    overlaid = mpo_gdf.overlay(routes_gdf, keep_geom_type=False)
    overlaid.length = overlaid.geometry.map(lambda x: x.length)
    return overlaid

In [169]:
mpo_routes = mpos.groupby('mpo').apply(overlay_to_routes)

In [170]:
from shared_utils import rt_utils

## routes

* keep longest 2 shapes per route -- imperfect but ok for a general estimate

In [171]:
mpo_routes = mpo_routes.reset_index(drop=True)

In [172]:
countable = mpo_routes.drop_duplicates(subset=['mpo', 'agency', 'route_id'])

In [173]:
mpo_route_count = countable.groupby('mpo').count().reset_index()[['mpo', 'route_id']].rename(columns={"route_id":"n_routes"})

In [174]:
mpo_route_mi = mpo_routes.groupby('mpo')[['miles']].sum().reset_index().rename(columns={"miles":"total_route_miles"})

In [175]:
mpo_route_info = mpo_route_count.merge(mpo_route_mi, on='mpo')

In [176]:
routes_top2.route_id.count()

5769

In [177]:
routes_top2.miles.sum()

98560.05838177583

In [178]:
mpo_route_info.round(1).to_csv('routes_by_mpo.csv')

## stops

In [60]:
def overlay_to_stops(mpo_gdf, stops_gdf = stops):
    
    overlaid = mpo_gdf.overlay(stops_gdf, keep_geom_type=False)
    return overlaid

In [65]:
mpo_stops = mpos.groupby('mpo').apply(overlay_to_stops)

In [68]:
mpo_stops = mpo_stops.reset_index(drop=True)

In [93]:
mpo_stop_count = mpo_stops.groupby(['mpo', 'routetypes'])[['stop_id']].count().reset_index().rename(columns={'stop_id':'stop_count'})

In [94]:
def route_types_to_name(route_types):
    rttype_list = [x.strip() for x in route_types.split(',')]
    name_list = [rt_utils.route_type_names[rttype] for rttype in rttype_list]
    return " and ".join(name_list)

In [95]:
mpo_stop_count = mpo_stop_count.assign(
    route_type_names = mpo_stop_count.routetypes.map(lambda x: route_types_to_name(x)))

In [97]:
mpo_stop_count.head(2)

Unnamed: 0,mpo,routetypes,stop_count,route_type_names
0,Association of Monterey Bay Area Governments,"2, 3",3,Rail and Bus
1,Association of Monterey Bay Area Governments,3,1802,Bus


In [158]:
mpo_stop_count.to_csv('stops_by_mpo.csv')

In [112]:
all_stop_count = stops[['routetypes', 'stop_id']].groupby(['routetypes'])[['stop_id']].count().reset_index().rename(columns={'stop_id':'stop_count'})

In [114]:
all_stop_count = all_stop_count.assign(
    route_type_names = all_stop_count.routetypes.map(lambda x: route_types_to_name(x)))

In [159]:
all_stop_count.to_csv('all_stops.csv')

In [115]:
all_stop_count

Unnamed: 0,routetypes,stop_count,route_type_names
0,0,1481,"Tram, Streetcar, Light rail"
1,"0, 3",286,"Tram, Streetcar, Light rail and Bus"
2,1,121,"Subway, Metro"
3,2,340,Rail
4,"2, 3",113,Rail and Bus
5,3,126391,Bus
6,"3, 5",2,Bus and Cable tram
7,4,49,Ferry
8,5,230,Cable tram


## hqta

* MTS areas, dissolve
* HQTC outside MTS areas, dissolve

In [118]:
mts = hqta[hqta.hqta_type.str.contains('major')]

In [123]:
mts_dissolved = mts.dissolve()

In [134]:
SQ_M_TO_SQ_MI = 1/2.59e6

In [135]:
mts_dissolved.geometry.area.iloc[0] * SQ_M_TO_SQ_MI

910.7258667381161

In [128]:
hqtc = hqta[~hqta.hqta_type.str.contains('major')]
hqtc_dissolved = hqtc.dissolve()

In [131]:
hqtc_outside_mts = hqtc_dissolved.overlay(mts_dissolved, how='difference')

In [136]:
hqtc_outside_mts.geometry.area.iloc[0] * SQ_M_TO_SQ_MI

534.2202002565607

In [144]:
import shapely

In [150]:
def overlay_to_mts_hqtc(mpo_gdf):
    mpo_mts = mts_dissolved.overlay(mpo_gdf)
    # print(mpo_mts.geometry)
    if not mpo_mts.geometry.empty:
        mpo_gdf['major_transit_stop_sq_mi'] = mpo_mts.geometry.area.iloc[0] * SQ_M_TO_SQ_MI
    mpo_hqtc = hqtc_outside_mts.overlay(mpo_gdf)
    # print(mpo_hqtc.geometry)
    if not mpo_hqtc.geometry.empty:
        mpo_gdf['additional_hq_corridor_sq_mi'] = mpo_hqtc.geometry.area.iloc[0] * SQ_M_TO_SQ_MI
    
    return mpo_gdf

In [151]:
mpo_mts_hqtc = mpos.groupby('mpo').apply(overlay_to_mts_hqtc)

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  mpo_mts_hqtc = mpos.groupby('mpo').apply(overlay_to_mts_hqtc)


In [161]:
mpo_mts_hqtc.round(1).drop(columns=['geometry']).to_csv('mts_hq_corr_by_mpo.csv')