In [1]:
import datetime
import dask.dataframe as dd
import numpy as np
import geopandas as gpd
import pandas as pd
import altair as alt
from segment_speed_utils import gtfs_schedule_wrangling, helpers, segment_calcs,sched_rt_utils
from segment_speed_utils.project_vars import (
    COMPILED_CACHED_VIEWS,
    PROJECT_CRS,
    SEGMENT_GCS,
    
    analysis_date,
)
from scripts import (A1_sjoin_vp_segments, A2_valid_vehicle_positions)
from shared_utils import calitp_color_palette as cp, rt_utils, geography_utils
import _threshold_utils as threshold_utils

CONFIG_PATH = './scripts/config.yml'
STOP_SEG_DICT = helpers.get_parameters(CONFIG_PATH, "stop_segments")


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


In [2]:
DICT = helpers.get_parameters(CONFIG_PATH, "stop_segments")

In [3]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

## Sample segments and routes for Big Blue Bus

In [6]:
test_operator = "Big Blue Bus VehiclePositions"
test_org = "City of Santa Monica"
test_key = "6c2d7daaf979779fa2089c6395baf98b"

In [7]:
pub_df = pd.read_parquet(
    f"{SEGMENT_GCS}export/avg_speeds_stop_segments_{analysis_date}_tabular.parquet", 
    filters = [[("agency", "==", test_org)]]
)

In [8]:
# Dark orange
shape_id1  = "26375"
stop_seq1 = 7
shape_array1 = pub_df[pub_df.shape_id==shape_id1].shape_array_key.iloc[0]

In [9]:
# Light yellow 
shape_id2 = "26342"
stop_seq2 = 23
shape_array2 = pub_df[pub_df.shape_id==shape_id2].shape_array_key.iloc[0]

In [10]:
# Dark Red
shape_id3 = "26393"
stop_seq3 = 32
shape_array3 = pub_df[pub_df.shape_id==shape_id3].shape_array_key.iloc[0]

In [11]:
# Light orange
shape_id4 = "26372"
stop_seq4 = 14
shape_array4 = pub_df[pub_df.shape_id==shape_id4].shape_array_key.iloc[0]

In [12]:
# one_route_map(avg_speeds,shape_array4)

## Visualizing Speed
* https://nbviewer.org/github/cal-itp/data-analyses/blob/filter-speeds-avgs/rt_segment_speeds/18_speed_distribution.ipynb
* https://analysis.calitp.org/rt/district_07-los-angeles/9__speedmaps__district_07-los-angeles__itp_id_300.html

In [13]:
STG5_FILE = DICT['stage5']
avg_speeds = gpd.read_parquet(f"{SEGMENT_GCS}{STG5_FILE}_{analysis_date}.parquet")
avg_speeds = avg_speeds.drop(columns=["district", "district_name"])

In [14]:
STG4_FILE = DICT['stage4']
# Open up speeds
speeds = pd.read_parquet(f"{SEGMENT_GCS}{STG4_FILE}_{analysis_date}")

In [15]:
def one_route_map(avg_speeds:gpd.GeoDataFrame, shape_array_key:str):
    display(avg_speeds[avg_speeds.shape_array_key==shape_array_key].explore(
    "p50_mph", 
    tiles = "CartoDB Positron",
    cmap = rt_utils.ZERO_THIRTY_COLORSCALE,
    style_kwds = {'weight':5}))
    

### Charting

In [16]:
def speeds_one_trip_seq(shape_array_key:str, stop_sequence:int):
    STG4_FILE = DICT['stage4']
    df = pd.read_parquet(f"{SEGMENT_GCS}{STG4_FILE}_{analysis_date}", 
         filters = [[("shape_array_key", "==", shape_array_key),
                    ("stop_sequence", "==", stop_sequence)]]) 
    return df

In [17]:
def avg_speeds_one_trip_seq(shape_array_key:str, stop_sequence:int):
    STG5_FILE = DICT['stage5']
    df = pd.read_parquet(f"{SEGMENT_GCS}{STG5_FILE}_{analysis_date}.parquet", 
         filters = [[("shape_array_key", "==", shape_array_key),
                    ("stop_sequence", "==", stop_sequence),
                    ("time_of_day", "==", "all_day")]])
    df = df.drop(columns=["district", "district_name", "geometry"])
    return df

In [18]:
def merge_avg_all_speeds(shape_array_key:str, stop_sequence:int):
    speeds = speeds_one_trip_seq(shape_array_key, stop_sequence)
    avg = avg_speeds_one_trip_seq(shape_array_key, stop_sequence)
    m1 = pd.merge(speeds,avg, on =['shape_array_key','stop_sequence','gtfs_dataset_key'], how = "inner")
    return m1

#### First Version

In [19]:
def display_speeds(shape_array_key:str, stop_sequence:int):
    m1 = merge_avg_all_speeds(shape_array_key,stop_sequence)
    
    # Fill any nan values
    m1= m1.fillna(0)
    
    # Create chart title 
    chart_title = f"Speed Distribution for Seq {m1.stop_sequence.iloc[0]}/Shape {m1.shape_array_key.iloc[0]}"
    
    # Main chart
    chart = (alt.Chart(m1)
         .mark_bar()
         .encode(x='trip_id', y='speed_mph',tooltip = m1.columns.tolist())
         .properties(title = chart_title))
    
    # Rule
    rule1 = alt.Chart(m1).mark_rule(color='red', strokeDash=[10, 7]).encode(y='p20_mph')
    rule2 = alt.Chart(m1).mark_rule(color='blue', strokeDash=[10, 7]).encode(y='p80_mph')
    rule3 = alt.Chart(m1).mark_rule(color='green', strokeDash=[10, 7]).encode(y='p50_mph')
    chart = threshold_utils.chart_size((chart+rule1+rule2+rule3), 600,300)
    chart = chart.interactive()
    display(chart)

In [20]:
display_speeds(shape_array2, stop_seq2)

In [21]:
display_speeds(shape_array3, stop_seq3)

In [22]:
display_speeds(shape_array1, stop_seq1)

#### Second version

In [23]:
def speeds_one_route(shape_array_key:str):
    STG4_FILE = DICT['stage4']
    df = pd.read_parquet(f"{SEGMENT_GCS}{STG4_FILE}_{analysis_date}", 
         filters = [[("shape_array_key", "==", shape_array_key)]]) 
    return df

In [85]:
def avg_speeds_one_route(shape_array_key:str):
    STG5_FILE = DICT['stage5']
    df = pd.read_parquet(f"{SEGMENT_GCS}{STG5_FILE}_{analysis_date}.parquet", 
         filters = [[("shape_array_key", "==", shape_array_key),
                    ("time_of_day", "==", "all_day")]])
    df = df.drop(columns=["district", "district_name", "geometry"])
    return df

In [86]:
m2 = merge_avg_all_speeds(shape_array2,stop_seq2)

In [87]:
rule1 = alt.Chart(m2).mark_rule(color='red', strokeDash=[10, 7]).encode(y='p20_mph')

In [88]:
m2.speed_mph = m2.speed_mph.fillna(0)

In [89]:
bins = [0, 5, 10, 15, 25, 30, 35, 40, 45, 50, 55, 60, 65]

In [90]:

m2["binned"] = pd.cut(m2.speed_mph, bins).astype(str)

In [91]:
m2.binned = m2.binned.str.replace('nan','(0, 5]')

In [93]:
m2 = (m2
      .groupby(['binned'])
      .agg({'trip_id':'count'})
      .reset_index()
      .rename(columns = {'trip_id':'number_of_trips'})
     )

In [94]:
chart2 = (alt.Chart(m2)
    .mark_bar(size=40)
    .encode(
        x=alt.X('binned'),
        y=alt.Y('number_of_trips'),
        tooltip=m2.columns.tolist(),
    )
    .properties(title='Test')
    .interactive())

In [95]:
threshold_utils.chart_size(chart2, 400, 300)

### % of Meters

In [96]:
def import_segments(shape_array_key:str, 
                    gtfs_key:str) -> gpd.GeoDataFrame:
    """
    Args:
        flagged_df: result from df from categorize_meters_speeds_pandas()
    """
    # Load in ALL segments, flag them.
    FILE = STOP_SEG_DICT['segments_file']
    gdf = gpd.read_parquet(f"{SEGMENT_GCS}{FILE}_{analysis_date}.parquet",
                           filters = [[("shape_array_key", "==", shape_array_key),
                                      ("gtfs_dataset_key", "==", gtfs_key),
                                     ]]).to_crs(PROJECT_CRS)
    
    gdf = gdf.assign(
        meters_length=(gdf.geometry.to_crs(geography_utils.CA_NAD83Albers).length)
    )
    
    df = gdf.drop(columns = ['geometry','geometry_arrowized'])
    
    return df

In [97]:
seg2 = import_segments(shape_array2,test_key)

In [98]:
speeds2.sample()

NameError: name 'speeds2' is not defined

In [None]:
m2_segs_test = pd.merge(seg2, speeds2, on = ['shape_array_key','gtfs_dataset_key','stop_sequence'], how = "inner")

In [None]:
m2_segs_test['percent'] = m2_segs_test.meters_elapsed/m2_segs_test.meters_length

In [None]:
len(m2_segs_test)

In [None]:
len(m2_segs_test[m2_segs_test.percent <=.50])

In [None]:
len(m2_segs_test[m2_segs_test.percent >.50])

In [None]:
m2_segs_test[m2_segs_test.percent <=.50][['percent','meters_elapsed','meters_length']].sort_values(['percent'])