# Choose cut-offs for competitive or viable competitive

* What threshold to use for `pct_trips_competitive` to count a route as competitive (based on `bus_multiplier` only)?

In [1]:
import geopandas as gpd
import intake
import pandas as pd

import E2_aggregated_route_stats as aggregated_route_stats 

catalog = intake.open_catalog("./*.yml")



In [2]:
df = catalog.competitive_route_variability.read()

In [8]:
mean_speed_by_route = aggregated_route_stats.calculate_mean_speed_by_route(
    ["calitp_itp_id", "route_id"])

In [11]:
m1 = pd.merge(
    df, 
    mean_speed_by_route,
    on = ["calitp_itp_id", "route_id"],
    how = "outer",
    validate = "m:1",
    indicator="merge_speed"
)

In [12]:
m1.merge_speed.value_counts()

both          52006
left_only     32250
right_only      279
Name: merge_speed, dtype: int64

In [3]:
route_cols = ["calitp_itp_id", "route_id", "route_group"]
route_groups = df.route_group.unique().tolist()

In [5]:
print("Overall")
df2 = df[route_cols + ["pct_trips_competitive"]].drop_duplicates()
print(df2.pct_trips_competitive.describe())

for i in route_groups:
    subset = df2[df2.route_group==i]
    print(f"Route Group: {i}")
    print(subset.pct_trips_competitive.describe(
        percentiles = [0.25, 0.5, 0.6, 0.7, 0.75, 0.8]))

Overall
count    2152.000000
mean        0.319165
std         0.400511
min         0.000000
25%         0.000000
50%         0.071000
75%         0.664000
max         1.000000
Name: pct_trips_competitive, dtype: float64
Route Group: short
count    1452.000000
mean        0.323390
std         0.407897
min         0.000000
25%         0.000000
50%         0.047500
60%         0.243200
70%         0.500000
75%         0.695500
80%         1.000000
max         1.000000
Name: pct_trips_competitive, dtype: float64
Route Group: long
count    272.000000
mean       0.347555
std        0.394540
min        0.000000
25%        0.000000
50%        0.162500
60%        0.313800
70%        0.500000
75%        0.702750
80%        0.946000
max        1.000000
Name: pct_trips_competitive, dtype: float64
Route Group: medium
count    428.000000
mean       0.286787
std        0.377171
min        0.000000
25%        0.000000
50%        0.074500
60%        0.167000
70%        0.380700
75%        0.502500
80% 

* For short routes, it's 70th percentile for `pct_trips_commpetitive = 0.5`
* For medium routes, it's 75th percentile 
* For long routes, it's 70th percentile 