# Check HQTA points / polygons

In [None]:
import geopandas as gpd
import pandas as pd

from IPython.display import Markdown

from update_vars import GCS_FILE_PATH

In [None]:
def make_map(gdf, plot_col):
    date_cols = [c for c in gdf.columns if 
                 gdf[c].dtype == 'datetime64[ns]']
        
    m = gdf.drop(date_cols).explore(
        plot_col, 
        categorical=True, 
        tiles = "CartoDB Positron", 
        legend=True
    )
    
    display(m)

## Rail / BRT / Ferry stops

In [None]:
stops = gpd.read_parquet(
    f"{GCS_FILE_PATH}rail_brt_ferry.parquet"
)

hqta_types = list(stops.hqta_type.unique())

In [None]:
for i in hqta_types:
    display(Markdown(f"### HQTA Type: {i}"))
    
    make_map(stops[stops.hqta_type==i], "route_id")

## Check HQTA points

In [None]:
def check_for_missing(gdf):
    check_cols = ["hqta_type", "hqta_details"]
    for c in check_cols:
        print(f"column: {c}")
        print(gdf[c].value_counts())
        display(gdf[gdf[c].isna()])
    
    for c in ["base64_url_primary"]:
        print(f"column: {c}")
        display(gdf[gdf[c].isna()])
        
    for c in ["base64_url_secondary"]:
        print(f"column: {c}")
        display(gdf[gdf[c].isna()].agency_secondary.value_counts())       

In [None]:
gdf = gpd.read_parquet(f"{GCS_FILE_PATH}hqta_points.parquet")

check_for_missing(gdf)    

In [None]:
hq_types = gdf.hqta_type.unique().tolist()

for i in hq_types:
    subset = gdf[gdf.hqta_type == i]
    print(i)
    print(subset.hqta_details.value_counts())

In [None]:
for i in hq_types:
    subset = gdf[gdf.hqta_type==i]
    
    m = make_map(subset, "hqta_details")
    
    display(m)

## Check HQTA polygons

In [None]:
gdf = gpd.read_parquet(f"{GCS_FILE_PATH}hqta_areas.parquet")

check_for_missing(gdf)    

In [None]:
hq_types = gdf.hqta_type.unique().tolist()

for i in hq_types:
    subset = gdf[gdf.hqta_type==i]
    
    m = make_map(subset, "agency_primary")
    
    display(m)

In [None]:
gdf[gdf.agency_primary.str.contains("Santa Monica")].base64_url_primary.value_counts()

In [None]:
gdf[gdf.agency_primary.str.contains("Santa Monica")]

In [None]:
agency_primary