In [24]:
%%capture
import warnings
warnings.filterwarnings('ignore')
import calitp_data_analysis.magics
# from update_vars_index import ANALYSIS_DATE

import speedmap_utils
import pandas as pd
import geopandas as gpd
import numpy as np
from siuba import *
import shared_utils
catalog = shared_utils.catalog_utils.get_catalog('gtfs_analytics_data')

## https://github.com/cal-itp/data-analyses/issues/1306

In [5]:
## parameters cell
organization_source_record_ids = ['rec4pgjrmdhCh4z01', 'rec8zhnCPETu6qEiH', 'recPnGkwdpnr8jmHB',
                                 'recvzE9NXgGMmqcTH']

In [7]:
dates = ['jan2024', 'feb2024', 'mar2024', 'apr2024',
        'may2024', 'jun2024', 'jul2024', 'aug2024',
        'sep2024', 'oct2024', 'nov2024']

In [8]:
dates = [shared_utils.rt_dates.DATES[date] for date in dates]

In [25]:
def read_segments(organization_source_record_ids: list, analysis_date) -> gpd.GeoDataFrame:
    '''
    Get filtered detailed speedmap segments for an organization, and relevant district SHN.
    '''
    path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.shape_stop_single_segment_detail}_{analysis_date}.parquet'
    # path = f'{catalog.stop_segments.dir}{catalog.stop_segments.route_dir_single_segment_detail}_{update_vars_index.ANALYSIS_DATE}.parquet'
    speedmap_segs = gpd.read_parquet(path, filters=[['organization_source_record_id', 'in', organization_source_record_ids],
                                                   ['route_short_name', 'in', ['232', '109', '438', '13']]]) #  aggregated
    assert (speedmap_segs >> select(-_.route_short_name)).isna().any().any() == False, 'no cols besides route_short_name should be nan'
    speedmap_segs['date'] = analysis_date
    
    #  TODO move upstream and investigate
    speedmap_segs['fast_slow_ratio'] = speedmap_segs.p80_mph / speedmap_segs.p20_mph
    speedmap_segs.fast_slow_ratio = speedmap_segs.fast_slow_ratio.replace(np.inf, 3)
    speedmap_segs = speedmap_segs.round(1)
    # speedmap_segs = prepare_segment_gdf(speedmap_segs)
    # shn = gpd.read_parquet(rt_utils.SHN_PATH)
    # this_shn = shn >> filter(_.District.isin([int(x[:2]) for x in speedmap_segs.caltrans_district.unique()]))
    
    return speedmap_segs

In [26]:
gdf = read_segments(organization_source_record_ids, dates[0])

In [27]:
lines = gpd.GeoDataFrame()
for date in dates:
    print(date)
    lines = pd.concat([read_segments(organization_source_record_ids, date), lines])

2024-01-17
2024-02-14
2024-03-13
2024-04-17
2024-05-22
2024-06-12
2024-07-17
2024-08-14
2024-09-18
2024-10-16
2024-11-13


In [29]:
def read_process_segments(organization_source_record_ids: list, analysis_date) -> gpd.GeoDataFrame:
    '''
    Get filtered detailed speedmap segments for an organization, and relevant district SHN.
    '''
    path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.shape_stop_single_segment_detail}_{analysis_date}.parquet'
    # path = f'{catalog.stop_segments.dir}{catalog.stop_segments.route_dir_single_segment_detail}_{update_vars_index.ANALYSIS_DATE}.parquet'
    speedmap_segs = gpd.read_parquet(path, filters=[['organization_source_record_id', 'in', organization_source_record_ids],
                                                   ['route_short_name', 'in', ['232', '109', '438', '13']]]) #  aggregated
    assert (speedmap_segs >> select(-_.route_short_name)).isna().any().any() == False, 'no cols besides route_short_name should be nan'
    speedmap_segs['date'] = analysis_date
    speedmap_segs = speedmap_utils.prepare_segment_gdf(speedmap_segs)
    # shn = gpd.read_parquet(rt_utils.SHN_PATH)
    # this_shn = shn >> filter(_.District.isin([int(x[:2]) for x in speedmap_segs.caltrans_district.unique()]))
    
    return speedmap_segs

In [30]:
polygons = gpd.GeoDataFrame()
for date in dates:
    print(date)
    polygons = pd.concat([read_process_segments(organization_source_record_ids, date), polygons])

2024-01-17
2024-02-14
2024-03-13
2024-04-17
2024-05-22
2024-06-12
2024-07-17
2024-08-14
2024-09-18
2024-10-16
2024-11-13


In [32]:
polygons.to_file('hermosa_speedmap_polygons.geojson')

In [33]:
lines.to_file('hermosa_speedmap_lines.geojson')

In [37]:
def write_gz(gdf, path):
    geojson_str = gdf.to_json()
    geojson_bytes = geojson_str.encode("utf-8")
    # if verbose:
    #     print(f"writing to {path}")
    with open(path, "wb") as writer:
        with gzip.GzipFile(fileobj=writer, mode="w") as gz:
            gz.write(geojson_bytes)
    

In [38]:
import gzip

In [39]:
write_gz(polygons, 'hermosa_speedmap_polygons.geojson.gz')

In [40]:
write_gz(lines, 'hermosa_speedmap_lines.geojson.gz')