In [None]:
import intake
import pandas as pd
import geopandas as gpd

from calitp_data_analysis.geography_utils import CA_NAD83Albers_m
from shared_utils import rt_utils

import google.auth
credentials, project = google.auth.default()

In [None]:
import sys
sys.path.append('../open_data/')

In [None]:
import open_data_utils

# quick GTFS-based stats for section 1e

In [None]:
catalog = intake.open_catalog("../_shared_utils/shared_utils/shared_data_catalog.yml")

In [None]:
mpos = catalog.metropolitan_planning_orgs.read()[['MPO', 'geometry']].to_crs(CA_NAD83Albers_m).rename(columns={"MPO":"mpo"})

In [None]:
ct_dist = catalog.caltrans_districts.read().to_crs(CA_NAD83Albers_m)

In [None]:
stops = catalog.ca_transit_stops.read().to_crs(CA_NAD83Albers_m).clip(ct_dist)

In [None]:
routes = (catalog.ca_transit_routes.read()
          .drop_duplicates(subset=['route_id', 'shape_id', 'base64_url'])  # deduplicate identical routes where agencies share a feed...
          .to_crs(CA_NAD83Albers_m)
          .clip(ct_dist)
         )

In [None]:
hqta = catalog.hqta_areas.read().to_crs(CA_NAD83Albers_m).clip(ct_dist)

In [None]:
routes['length'] = routes.geometry.map(lambda x: x.length)

In [None]:
routes_top2 = routes.sort_values(['agency', 'route_id', 'length'],
                  ascending=False).groupby(['agency', 'route_id']).head(2)
routes_top2 = routes_top2.assign(miles = routes_top2.length / rt_utils.METERS_PER_MILE)

In [None]:
def overlay_to_routes(mpo_gdf, routes_gdf = routes_top2):
    
    overlaid = mpo_gdf.overlay(routes_gdf, keep_geom_type=False)
    overlaid.length = overlaid.geometry.map(lambda x: x.length)
    return overlaid

In [None]:
mpo_routes = mpos.groupby('mpo').apply(overlay_to_routes)

## routes

* keep longest 2 shapes per route -- imperfect but ok for a general estimate

In [None]:
mpo_routes = mpo_routes.reset_index(drop=True)

In [None]:
mpo_routes.columns

In [None]:
countable = mpo_routes.drop_duplicates(subset=['mpo', 'agency', 'route_id']).drop(columns=['length', 'miles', 'geometry'])

In [None]:
countable.groupby('mpo').count().reset_index()[['mpo', 'route_id']]

In [None]:
mpo_route_count = countable.groupby('mpo').count().reset_index()[['mpo', 'route_id']].rename(columns={"route_id":"n_routes"})

In [None]:
mpo_route_mi = mpo_routes.groupby('mpo')[['miles']].sum().reset_index().rename(columns={"miles":"total_route_miles"})

In [None]:
mpo_route_info = mpo_route_count.merge(mpo_route_mi, on='mpo')

In [None]:
routes_top2.route_id.count()

In [None]:
routes_top2.miles.sum()

In [None]:
mpo_route_info.round(1)

In [None]:
mpo_route_info.round(1).to_csv('routes_by_mpo.csv')

## stops

In [None]:
def overlay_to_stops(mpo_gdf, stops_gdf = stops):
    
    overlaid = mpo_gdf.overlay(stops_gdf, keep_geom_type=False)
    return overlaid

In [None]:
mpo_stops = mpos.groupby('mpo').apply(overlay_to_stops)

In [None]:
mpo_stops = mpo_stops.reset_index(drop=True)

In [None]:
mpo_stop_count = mpo_stops.groupby(['mpo', 'routetypes'])[['stop_id']].count().reset_index().rename(columns={'stop_id':'stop_count'})

In [None]:
def route_types_to_name(route_types):
    rttype_list = [x.strip() for x in route_types.split(',')]
    name_list = [rt_utils.route_type_names[rttype] for rttype in rttype_list]
    return " and ".join(name_list)

In [None]:
mpo_stop_count = mpo_stop_count.assign(
    route_type_names = mpo_stop_count.routetypes.map(lambda x: route_types_to_name(x)))

In [None]:
mpo_stop_count.head(2)

In [None]:
mpo_stop_count.to_csv('stops_by_mpo.csv')

In [None]:
all_stop_count = stops[['routetypes', 'stop_id']].groupby(['routetypes'])[['stop_id']].count().reset_index().rename(columns={'stop_id':'stop_count'})

In [None]:
all_stop_count = all_stop_count.assign(
    route_type_names = all_stop_count.routetypes.map(lambda x: route_types_to_name(x)))

In [None]:
all_stop_count.to_csv('all_stops.csv')

In [None]:
all_stop_count

## hqta

* MTS areas, dissolve
* HQTC outside MTS areas, dissolve

In [None]:
mts = hqta[hqta.hqta_type.str.contains('major')]

In [None]:
mts_dissolved = mts.dissolve()

In [None]:
SQ_M_TO_SQ_MI = 1/2.59e6

In [None]:
mts_dissolved.geometry.area.iloc[0] * SQ_M_TO_SQ_MI

In [None]:
hqtc = hqta[~hqta.hqta_type.str.contains('major')]
hqtc_dissolved = hqtc.dissolve()

In [None]:
hqtc_outside_mts = hqtc_dissolved.overlay(mts_dissolved, how='difference')

In [None]:
hqtc_outside_mts.geometry.area.iloc[0] * SQ_M_TO_SQ_MI

In [None]:
import shapely

In [None]:
def overlay_to_mts_hqtc(mpo_gdf):
    mpo_mts = mts_dissolved.overlay(mpo_gdf)
    # print(mpo_mts.geometry)
    if not mpo_mts.geometry.empty:
        mpo_gdf['major_transit_stop_sq_mi'] = mpo_mts.geometry.area.iloc[0] * SQ_M_TO_SQ_MI
    mpo_hqtc = hqtc_outside_mts.overlay(mpo_gdf)
    # print(mpo_hqtc.geometry)
    if not mpo_hqtc.geometry.empty:
        mpo_gdf['additional_hq_corridor_sq_mi'] = mpo_hqtc.geometry.area.iloc[0] * SQ_M_TO_SQ_MI
    
    return mpo_gdf

In [None]:
mpo_mts_hqtc = mpos.groupby('mpo').apply(overlay_to_mts_hqtc)

In [None]:
mpo_mts_hqtc.round(1).drop(columns=['geometry']).to_csv('mts_hq_corr_by_mpo.csv')

## MTC questions

In [None]:
mtc = mpo_routes.query('mpo=="Metropolitan Transportation Commission"')

In [None]:
mtc.groupby('agency').sum().sort_values('miles', ascending=False)

In [None]:
mtc.explore()

In [None]:
countable_mtc = countable.query('mpo=="Metropolitan Transportation Commission"')

In [None]:
countable_mtc

### santa barbara?

In [None]:
sb = mpo_routes.query('mpo.str.contains("Santa Barbara")')

In [None]:
sb.explore()

In [None]:
sb.query('agency.str.contains("Ventura")').explore()

In [None]:
sb_ct = countable.query('mpo.str.contains("Santa Barbara")')

In [None]:
sb_ct.groupby('agency').count() #  exclude VCTC duplicates, otherwise OK

In [None]:
sb_ct.query('agency.str.contains("Metro")')