# Check exported data

In [1]:
import geopandas as gpd
import intake
import pandas as pd

catalog = intake.open_catalog("./catalog.yml")

In [2]:
def print_stats(gdf):
    print(f"CRS: {gdf.crs.to_epsg()}")
    print(f"{gdf.columns}")
    print(gdf.dtypes)
    print(f"# rows: {len(gdf)}")

## Routes

In [9]:
gdf = catalog.ca_transit_routes.read()

In [10]:
print_stats(gdf)

CRS: 4326
Index(['agency', 'route_id', 'route_type', 'route_name', 'route_length_feet',
       'shape_id', 'n_trips', 'base64_url', 'shn_route', 'on_shs',
       'shn_districts', 'pct_route_on_hwy_across_districts', 'is_express',
       'is_ferry', 'is_rail', 'is_coverage', 'is_local', 'is_downtown_local',
       'is_rapid', 'geometry'],
      dtype='object')
agency                                 object
route_id                               object
route_type                             object
route_name                             object
route_length_feet                     float64
shape_id                               object
n_trips                                 int64
base64_url                             object
shn_route                              object
on_shs                                  int64
shn_districts                          object
pct_route_on_hwy_across_districts     float64
is_express                              int64
is_ferry                                

In [11]:
gdf.agency.unique()

array(['Marin County Transit District', 'Monterey-Salinas Transit',
       'Palos Verdes Peninsula Transit Authority', 'POINT',
       'Basin Transit', 'City of Lynwood', 'City of Calabasas',
       'City of Petaluma', 'Solano Transportation Authority',
       'UCSC and City of Santa Cruz Beach Shuttle',
       'Peninsula Corridor Joint Powers Board', 'City of Clovis',
       'Capitol Corridor Joint Powers Authority',
       'City and County of San Francisco',
       'Yuba-Sutter Transit Authority', 'Stanford University',
       'Santa Barbara County Association of Governments',
       'City of Norwalk', 'City of Manteca',
       'San Francisco Bay Area Rapid Transit District',
       'Angel Island-Tiburon Ferry Company', 'Yosemite National Park',
       'City of Alhambra', 'OmniTrans', 'City of Torrance',
       'City of Mountain View', 'Treasure Island Community Development',
       'City of Rio Vista', 'Santa Clara Valley Transportation Authority',
       'City of West Hollywood', '

In [None]:
cols = ['agency', 'route_id', 'shape_id']
print(f"# unique combos: {len(gdf[cols].drop_duplicates())}")

In [None]:
def make_map(gdf: gpd.GeoDataFrame, plot_col: str):
    m = gdf.explore(plot_col, categorical=True, 
                    tiles="CartoDB Positron", legend=False
                   )
    display(m)

In [None]:
gdf.plot("route_id")
#make_map(gdf, "route_id")

## Stops

In [3]:
gdf = catalog.ca_transit_stops.read()   

In [4]:
print_stats(gdf)

CRS: 4326
Index(['org_id', 'agency', 'stop_id', 'stop_name', 'n_routes',
       'route_ids_served', 'routetypes', 'n_arrivals', 'n_hours_in_service',
       'meters_to_ca_state_highway', 'base64_url', 'district_name',
       'geometry'],
      dtype='object')
org_id                          object
agency                          object
stop_id                         object
stop_name                       object
n_routes                         int64
route_ids_served                object
routetypes                      object
n_arrivals                       int64
n_hours_in_service               int64
meters_to_ca_state_highway     float64
base64_url                      object
district_name                   object
geometry                      geometry
dtype: object
# rows: 129391


In [8]:
gdf.agency.unique()

array(['City of Inglewood', 'City of Lodi', 'City of Arcadia',
       'City of Union City', 'City of El Monte',
       'Kings County Area Public Transit Agency',
       'Santa Cruz Metropolitan Transit District',
       'Transit Joint Powers Authority for Merced County',
       'Central Contra Costa Transit Authority', 'Nevada County',
       'City of Maywood', 'Greyhound', 'FlixBus', 'City of Pasadena',
       'City of West Covina', 'University of California, San Diego',
       'Commute.org', 'City of Menlo Park',
       'Golden Empire Transit District', 'City of San Luis Obispo',
       'City of Escalon', 'San Joaquin Regional Transit District',
       'City of Calabasas', 'Sacramento County',
       'Monterey-Salinas Transit', 'Trinity County',
       'Tahoe Transportation District', 'Madera County', 'City of Duarte',
       'Foothill Transit', 'City of Glendale',
       'San Joaquin Regional Rail Commission',
       'City of South San Francisco', 'Riverside Transit Agency',
       

In [6]:
cols = ['agency', 'route_ids_served', 'stop_id']
print(f"# unique combos: {len(gdf[cols].drop_duplicates())}")

# unique combos: 113469


In [None]:
gdf.plot("stop_id")
#make_map(gdf, "route_id")

## HQTA Areas

In [None]:
gdf = catalog.ca_hq_transit_areas.read()

In [None]:
print_stats(gdf)

In [None]:
gdf[gdf.hqta_type=="major_stop_brt"].route_id.value_counts()

In [None]:
gdf.plot("org_id_primary")

## HQTA Stops

In [None]:
gdf = catalog.ca_hq_transit_stops.read()

In [None]:
print_stats(gdf)

In [None]:
gdf.plot("org_id_primary")

## Speeds by Stop Segments

In [None]:
gdf = catalog.speeds_by_stop_segments.read()

In [None]:
print_stats(gdf)

In [None]:
gdf.p50_mph.hist(bins=range(0, 80, 5))

In [None]:
for col in ["p20_mph", "p50_mph", "p80_mph"]:
    print(gdf[col].describe())

In [None]:
gdf.plot("p50_mph", legend=True)

## Speeds by Route and Time-of-Day

In [None]:
gdf = catalog.speeds_by_route_time_of_day.read()

In [None]:
print_stats(gdf)

In [None]:
gdf.plot("speed_mph", legend=True)