In [None]:
import geopandas as gpd
import pandas as pd
from siuba import *

# Ad-hoc Transit Priority Rankings

* unwieldy given combinination of overlapping corridors, temporal differences, external metrics (accessibility+ridership)
* we don't currently expect to repeat this process, if we do we should find a way to streamline
* accessibility data via Conveyal and external post-processing tool, resulting csv also includes manually researched ridership munbers

# project sources

## dropped/excluded

* US 50 (withdrawn)
* US 101 Marin (minimal accessibility/speed benefits, worthwhile project better analyzed in another context)
* MST reroute (apparent marginal/negative benefits, institutional barriers to relocating/adding service)

# read and aggregate

## original metrics from two cycles of engagement with districts

In [None]:
first_set = gpd.read_file('./june16_corridor_metrics_speeds.geojson')
first_set = first_set >> filter(-_.corridor.str.contains('us50')) #dropped by dist
first_set = first_set >> filter(-_.corridor.str.contains('us101_lombard')) #has hov lane
first_set['submission_round'] = 1

In [None]:
(first_set >> select(-_.geometry)).to_csv('pending_accessibility.csv')

In [None]:
second_set = gpd.read_file('./sep9_corridor_metrics_speeds.geojson')
second_set['submission_round'] = 2

## combine bbb and Metro

In [None]:
smbbb = gpd.read_parquet('./sep9_big_blue_bus_sr2.parquet')

In [None]:
smbbb.corridor = smbbb.corridor.map(lambda x: x[4:])

In [None]:
metro = second_set >> filter(_.corridor.str.contains('sr2'))

In [None]:
df1 = metro >> select(_.corridor, _.total_speed_delay, _.n_trips, _.trips_added)
df2 = smbbb >> select(_.corridor, _.total_speed_delay, _.n_trips, _.trips_added)

In [None]:
grouped = pd.concat([df1, df2]).groupby('corridor').sum().reset_index()

In [None]:
grouped

In [None]:
sr2_all = second_set >> filter(_.corridor.str.contains('sr2')) >> select(-_.total_speed_delay, -_.n_trips, -_.trips_added)

In [None]:
sr2_all = sr2_all >> inner_join(_, grouped, on = 'corridor')

In [None]:
no_sr2 = second_set >> filter(-_.corridor.str.contains('sr2'))

In [None]:
second_set = pd.concat([no_sr2, sr2_all])

In [None]:
access_results = pd.read_csv('./AccessibilityResults.csv')

In [None]:
first_set >> head(3)

In [None]:
second_set >> head(3)

In [None]:
all_corridors = pd.concat([first_set, second_set])

In [None]:
all_grouped = (all_corridors >> group_by(_.corridor, _.organization, _.submission_round)  # length and span cause fan-out, re-add later...
               >> summarize(sum_trips =_.n_trips.sum(), total_delay = _.total_speed_delay.max())
               >> mutate(avg_delay_minutes = _.total_delay / _.sum_trips)
)

## note: don't sum/aggregate accessibility results within a corridor

In [None]:
rider_access_joined = all_grouped >> left_join(_, access_results, on=['corridor', 'submission_round'])

# Adding ridership (added to accessibility csv)

## apportioning ridership for partial-hour bus lane on sr2

* evenly during daytime hours (0500 - 2200), match post-covid trends of less peaking
* 22,669 daily for entire corr
* 

## notes
* for SDMTS, sum ridership from routes 60, 235, 225

In [None]:
metro_4_ridership = 22669
bbb_1_ridership = 5747
riders_per_hour = (metro_4_ridership + bbb_1_ridership) / (22 - 5)

In [None]:
rider_estimates = (second_set >> filter(_.corridor.str.contains('sr2'))
 >> select(_.corridor, _.span_hours, _.submission_round)
 >> mutate(daily_riders = _.span_hours * riders_per_hour)
 >> select(-_.span_hours)
)
rider_estimates

In [None]:
rider_access_joined = rider_access_joined >> left_join(_, rider_estimates, on =['corridor', 'submission_round'])

In [None]:
rider_access_joined['daily_riders'] = rider_access_joined.daily_riders_x.fillna(rider_access_joined.daily_riders_y)

In [None]:
rider_access_joined = rider_access_joined >> select(-_.daily_riders_x, -_.daily_riders_y)

# Create defined projects from disaggregated sr2

In [None]:
df = rider_access_joined >> filter(_.corridor.str.contains('sr2'), _.submission_round == 2)

In [None]:
df

In [None]:
def df_combined(filtered_df, corridor):
    df2 = filtered_df[['sum_trips', 'total_delay', 'daily_riders']].sum()
    df2['avg_delay_minutes'] = df2.total_delay / df2.sum_trips
    df2['corridor'] = corridor
    df2_dict = df2.to_dict()
    unchanged_series = (filtered_df >> select(-_.sum_trips, -_.total_delay, -_.daily_riders, -_.avg_delay_minutes, -_.corridor)).iloc[0,:]
    unchanged_dict = unchanged_series.to_dict()
    merged_dict = df2_dict | unchanged_dict
    merged_df = pd.DataFrame(merged_dict, index=[99])
    return merged_df

In [None]:
all_day_df = df_combined(df, 'sr2_all_day')

In [None]:
peak_only_df = df_combined(df >> filter(_.corridor.isin(['sr2_am', 'sr2_pm'])), 'sr2_peak')

In [None]:
with_sr2 = pd.concat([rider_access_joined, all_day_df, peak_only_df]).reset_index(drop=True)

In [None]:
sr2_precursors = ['sr2_am', 'sr2_pm', 'sr2_early_am', 'sr2_eve', 'sr2_mid']

In [None]:
eval_df = with_sr2 >> filter(-_.corridor.isin(sr2_precursors))

# Testing metrics

In [None]:
eval_df.columns

In [None]:
eval_df = (eval_df >> mutate(rider_minutes_delay = _.avg_delay_minutes * _.daily_riders)
         >> select(_.corridor, _.organization, _.submission_round,
                   _.weighted_avg_change_access_Jobs, _.weighted_avg_change_access_POIs,
                   _.avg_pct_chng_access,
                   _.pct_li_sub50k, _.daily_riders, _.rider_minutes_delay
                  )
         >> select(-_.weighted_avg_change_access_POIs)
)

In [None]:
eval_df >> filter(_.daily_riders.isna())

In [None]:
eval_df = eval_df.dropna()

In [None]:
nineteenth = eval_df >> filter(_.corridor.str.contains('sr1_19th'))

In [None]:
nineteenth.rider_minutes_delay = nineteenth.rider_minutes_delay.sum()

In [None]:
nineteenth.corridor = 'sr1_19th'

In [None]:
eval_df = pd.concat([eval_df >> filter(-_.corridor.str.contains('sr1_19th')), nineteenth.iloc[:1,:]])

In [None]:
# eval_df

# Draft ranking

In [None]:
from scipy import stats
eval_df["pctile_access"] = eval_df.avg_pct_chng_access.map(lambda x: stats.percentileofscore(eval_df.avg_pct_chng_access, x))
eval_df["pctile_rider"] = eval_df.daily_riders.map(lambda x: stats.percentileofscore(eval_df.daily_riders, x))
eval_df["pctile_rider_delay"] = eval_df.rider_minutes_delay.map(lambda x: stats.percentileofscore(eval_df.rider_minutes_delay, x))

In [None]:
eval_df['total_score'] = eval_df.pctile_access + eval_df.pctile_rider + eval_df.pctile_rider_delay
# eval_df >> arrange(-_.total_score)

In [None]:
first_filtered = first_set >> distinct(_.corridor, _.length_miles, _.geometry)

In [None]:
second_filtered = second_set >> distinct(_.corridor, _.length_miles, _.geometry) >> filter(-_.corridor.isin(['sr2_am', 'sr2_mid', 'sr2_eve']))

In [None]:
# second_filtered.explore()

In [None]:
second_filtered = second_filtered.dissolve('corridor').reset_index()

In [None]:
second_filtered

In [None]:
second_filtered.corridor = second_filtered.corridor.str.replace('sr2_early_am', 'sr2_all_day')
second_filtered.corridor = second_filtered.corridor.str.replace('sr2_pm', 'sr2_peak')

In [None]:
first_filtered.corridor = first_filtered.corridor.str.replace('sr1_19th_a', 'sr1_19th')
first_filtered.corridor = first_filtered.corridor.str.replace('sr1_19th_b', 'sr1_19th')

In [None]:
first_filtered = first_filtered.dissolve('corridor').reset_index()

In [None]:
both_filtered = pd.concat([first_filtered, second_filtered])

In [None]:
from calitp_data_analysis import geography_utils

In [None]:
with_geom = eval_df >> left_join(_, both_filtered, on = 'corridor')
with_geom = gpd.GeoDataFrame(with_geom, crs=geography_utils.CA_NAD83Albers)

In [None]:
# with_geom.explore()

# Normalized draft ranking

In [None]:
# with_geom = with_geom >> select(-_.pctile_access, -_.pctile_rider, -_.pctile_rider_delay)

In [None]:
with_geom = with_geom >> mutate(access_mi = _.weighted_avg_change_access_Jobs / _.length_miles,
                    riders_mi = _.daily_riders / _.length_miles,
                    rider_delay_mi = _.rider_minutes_delay / _.length_miles
                   )

In [None]:
with_geom["pctile_access_norm"] = with_geom.access_mi.map(lambda x: stats.percentileofscore(with_geom.access_mi, x))
with_geom["pctile_rider_norm"] = with_geom.riders_mi.map(lambda x: stats.percentileofscore(with_geom.riders_mi, x))
with_geom["pctile_rider_delay_norm"] = with_geom.rider_delay_mi.map(lambda x: stats.percentileofscore(with_geom.rider_delay_mi, x))

with_geom['total_score_norm'] = with_geom.pctile_access_norm + with_geom.pctile_rider_norm + with_geom.pctile_rider_delay_norm
# with_geom >> arrange(-_.total_score_norm)

In [None]:
with_geom.columns

In [None]:
(with_geom >> select(-_.geometry)).to_csv('detailed_scores.csv')

In [None]:
summary = with_geom >> select(_.corridor, _.organization, _.submission_round,
                   _.pct_li_sub50k, _.length_miles, _.total_score,
                   _.total_score_norm
                   )

In [None]:
summary >> arrange(-_.total_score_norm)

In [None]:
# (with_geom >> filter(_.corridor == 'sr123_san_pablo')).explore(tiles = "CartoDB positron")

In [None]:
# with_geom.explore(tiles = "CartoDB positron")