# Check exported data

In [None]:
import geopandas as gpd
import intake
import pandas as pd

catalog = intake.open_catalog("./catalog.yml")

In [None]:
def print_stats(gdf):
    print(f"CRS: {gdf.crs.to_epsg()}")
    print(f"{gdf.columns}")
    print(gdf.dtypes)
    print(f"# rows: {len(gdf)}")

## Routes

In [None]:
gdf = catalog.ca_transit_routes.read()

In [None]:
print_stats(gdf)

In [None]:
cols = ['agency', 'route_id', 'shape_id']
print(f"# unique combos: {len(gdf[cols].drop_duplicates())}")

In [None]:
def make_map(gdf: gpd.GeoDataFrame, plot_col: str):
    m = gdf.explore(plot_col, categorical=True, 
                    tiles="CartoDB Positron", legend=False
                   )
    display(m)

In [None]:
gdf.plot("route_id")
#make_map(gdf, "route_id")

## Stops

In [None]:
gdf = catalog.ca_transit_stops.read()   

In [None]:
print_stats(gdf)

In [None]:
cols = ['agency', 'route_id', 'stop_id']
print(f"# unique combos: {len(gdf[cols].drop_duplicates())}")

In [None]:
gdf.plot("route_id")
#make_map(gdf, "route_id")

## HQTA Areas

In [None]:
gdf = catalog.ca_hq_transit_areas.read()

In [None]:
print_stats(gdf)

In [None]:
gdf[gdf.hqta_type=="major_stop_brt"].route_id.value_counts()

In [None]:
gdf.plot("org_id_primary")

## HQTA Stops

In [None]:
gdf = catalog.ca_hq_transit_stops.read()

In [None]:
print_stats(gdf)

In [None]:
gdf.plot("org_id_primary")

## Speeds by Stop Segments

In [None]:
gdf = catalog.speeds_by_stop_segments.read()

In [None]:
print_stats(gdf)

In [None]:
gdf.p50_mph.hist(bins=range(0, 80, 5))

In [None]:
for col in ["p20_mph", "p50_mph", "p80_mph"]:
    print(gdf[col].describe())

In [None]:
gdf.plot("p50_mph", legend=True)

## Speeds by Route and Time-of-Day

In [None]:
gdf = catalog.speeds_by_route_time_of_day.read()

In [None]:
print_stats(gdf)

In [None]:
gdf.plot("speed_mph", legend=True)