# Check HQTA points / polygons

## Dropping bad stops

* Be more stringent about what `stop_id` to drop, since the same `stop_id` can be shared across operators. Also add in which operator.

### Done in `D2_assemble_hqta_polygons`, but should also be added to `D1_assemble_hqta_points`


In [None]:
import geopandas as gpd
import pandas as pd

from IPython.display import Markdown

from utilities import GCS_FILE_PATH

In [None]:
gdf = gpd.read_parquet(f"{GCS_FILE_PATH}hqta_points.parquet")

In [None]:
## drop incorrect HMB data, TODO investigate
## drop incorrect Cheviot data, TODO investigate refactor (run shapes in frequency order...)
bad_stops = ['315604', '315614', '689']

In [None]:
TILES = "CartoDB Positron"

def make_map(gdf, plot_col):
    if "service_date" in gdf.columns:
        gdf = gdf.drop(columns = "service_date")
        
    m = gdf.explore(plot_col, categorical=True, tiles = TILES)
    
    display(m)

In [None]:
for i in bad_stops:
    subset = gdf[gdf.stop_id == i]
    
    if len(subset) > 0:
        display(subset)
        display(Markdown(f"### Stop ID:  {i}"))

        make_map(subset, "stop_id")
    else:
        print(f"No observations of stop_id {i}")

## Check HQTA points

In [None]:
gdf = gpd.read_parquet(f"{GCS_FILE_PATH}hqta_points.parquet")

In [None]:
def check_for_missing(gdf):
    check_cols = ["hqta_type", "hqta_details"]
    for c in check_cols:
        print(f"column: {c}")
        print(gdf[c].value_counts())
        display(gdf[gdf[c].isna()])
        

check_for_missing(gdf)    

In [None]:
hq_types = gdf.hqta_type.unique().tolist()

for i in hq_types:
    subset = gdf[gdf.hqta_type==i]
    
    m = make_map(subset, "hqta_details")
    
    display(m)

## Check HQTA polygons

In [None]:
gdf = gpd.read_parquet(f"{GCS_FILE_PATH}hqta_areas.parquet")

check_for_missing(gdf)    

In [None]:
for i in hq_types:
    subset = gdf[gdf.hqta_type==i]
    
    m = make_map(subset, "calitp_itp_id_primary")
    
    display(m)