# HQTA Map Checks

Do some sanity checks and maps on the intermediate outputs to make sure everything looks reasonable.

In [1]:
import geopandas as gpd
import pandas as pd

import utilities 
from shared_utils import geography_utils



### After `B2_combine_operator_corridors`

In [2]:
ALL_BUS = utilities.catalog_filepath("all_bus")

hqta_all_operators = gpd.read_parquet(ALL_BUS)

In [3]:
keep_cols = ['calitp_itp_id', 'hq_transit_corr', 'route_id']

dissolved = hqta_all_operators[keep_cols + ['geometry']
                              ].dissolve(by=keep_cols).reset_index()

dissolved.head()

Unnamed: 0,calitp_itp_id,hq_transit_corr,route_id,geometry
0,4,False,10,"MULTIPOLYGON (((-184617.119 -35169.116, -18462..."
1,4,False,12,"MULTIPOLYGON (((-199990.480 -22174.364, -19998..."
2,4,False,14,"MULTIPOLYGON (((-197401.823 -22481.392, -19740..."
3,4,False,18,"MULTIPOLYGON (((-199560.444 -18980.881, -19955..."
4,4,False,19,"POLYGON ((-198503.848 -24333.188, -198486.437 ..."


In [4]:
COUNTY_URL = "https://opendata.arcgis.com/datasets/8713ced9b78a4abb97dc130a691a8695_0.geojson"

counties = gpd.read_file(COUNTY_URL).to_crs(geography_utils.CA_NAD83Albers)

bay_area_counties = [
    "Alameda", "Contra Costa", 
    "Marin", "Napa", 
    "San Francisco", "San Mateo", "Santa Clara", 
    "Solano", "Sonoma"
]

hqta_in_bay = gpd.sjoin(
    dissolved,
    counties[counties.COUNTY_NAME.isin(bay_area_counties)][
        ["COUNTY_NAME", "geometry"]],
    how = "inner",
    predicate="intersects"
)

hqta_in_la = gpd.sjoin(
    dissolved,
    counties[counties.COUNTY_NAME == "Los Angeles"][
        ["COUNTY_NAME", "geometry"]],
    how = "inner",
    predicate="intersects"
)

### Bay Area Map

In [5]:
#utilities.map_hqta(hqta_in_bay)

#### Static Image

![map](img/bay.png)

### Los Angeles Map

In [6]:
#utilities.map_hqta(hqta_in_la)

#### Static Image

![map](img/la.png)

## Result

* Ran and aggregated for nearly all bus operators statewide
    * Segments not containing stops will not appear as HQTA-- may need to interpolate (e.g., freeway segments)
    * Some questionable short segments
* Algorithm may be overestimating for SFMTA

### Data Issues

* 61 County Connection has a lot of na departure times?
    * strange results after dropna
* 48 B-Line similarly choppy
* 116 Fresno Area Express has whitespace in departure times
    * ValueError: time data ' 7:04:00' does not match format '%H:%M:%S'

