# Analysis
* Create yml for all my files. 
* Save overlay? 

In [4]:
# Read in zip files
import fsspec
import geopandas as gpd
import intake
import numpy as np
import pandas as pd

# My utilities
import utilities
from calitp import *

# Display
from IPython.display import HTML, Image, Markdown, display, display_html

# Geometry
from shared_utils import geography_utils, utils

In [5]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [6]:
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/cellular_coverage/"

In [7]:
routes_df =  utilities.unique_routes(gpd.read_parquet(
    "gs://calitp-analytics-data/data-analyses/traffic_ops/ca_transit_routes.parquet"
))

## AT&T

In [8]:
att_df = gpd.read_parquet(f"{GCS_FILE_PATH}att_ca_only.parquet")

In [51]:
def route_cell_coverage(provider_gdf, original_routes_df):
    """
    Args:
        provider_gdf: the provider gdf clipped to CA
        original_routes_df: the original df with all the routes
    Returns:
        Returns a gdf with the percentage of the routes covered by a provider
    """
    # Overlay the dfs
    overlay = utilities.comparison(routes_df, provider_gdf)

    # Sum up lengths of routes by route id, name, agency, and itp_id
    overlay2 = (
        overlay.groupby(["route_id", "route_name", "agency", "itp_id"])
        .agg({"route_length": "sum"})
        .reset_index()
    )

    # Merge original dataframe with old route with provider-route overlay
    # To get original route length and old route length
    m1 = pd.merge(
        overlay2,
        original_routes_df,
        how="inner",
        on=["agency", "route_id", "route_name", "itp_id"],
        suffixes=["_overlay", "_original_df"],
    )
    
    # Create % of route covered vs. not 
    m1["percentage"] = (
        m1["route_length_overlay"] / m1["route_length_original_df"]
    ) * 100
    
    # Create bins for analysis
    bins = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
    m1["binned"] = pd.cut(m1["percentage"], bins)
    
    return m1

In [39]:
test_m1 = route_cell_coverage(att_df, routes_df)

In [40]:
test_m1["binned"].value_counts()

(90, 100]    2069
(80, 90]      139
(70, 80]       75
(60, 70]       44
(50, 60]       38
(10, 20]        4
(20, 30]        2
(30, 40]        2
(40, 50]        2
(0, 10]         0
Name: binned, dtype: int64

In [69]:
low_att_coverage = test_m1.loc[test_m1['percentage'] < 70]

# Repeat of routes w/ same route id & route name run by different agencies?
low_att_coverage2 = low_att_coverage.sort_values(['percentage', 'agency']).drop_duplicates(subset = ['route_name','route_id']).reset_index(drop = True)

In [60]:
len(low_att_coverage2)

52

In [61]:
low_att_coverage2.drop(columns = ['geometry'])

Unnamed: 0,route_id,route_name,agency,itp_id,route_length_overlay,route_type,route_length_original_df,percentage,binned
1114,2596,"Carnelian Bay Tahoe Vista, Kings Beach, Crystal Bay, Incline Village",Tahoe Transportation,331,28061.43,3,239898.85,11.7,"(10, 20]"
154,1042,"Guerneville, Monte Rio",Cloverdale Transit,70,36647.6,3,272963.39,13.43,"(10, 20]"
1170,28,"Guerneville, Monte Rio",Sonoma County Transit,314,36647.6,3,272963.39,13.43,"(10, 20]"
752,178,Route between Weaverville and Hayfork. Connects to Redding Line in Douglas City.,Trinity Transit,344,39808.49,3,175452.54,22.69,"(20, 30]"
772,181,Route travels through all down-river communities between Weaverville and Willow Creek. Key route to the coast; connects with RTS service to Arcata/Eureka.,Trinity Transit,344,93602.8,3,313758.23,29.83,"(20, 30]"
1245,30,FSL,Sacramento Regional Transit District,273,19248.17,3,55102.22,34.93,"(30, 40]"
1038,230,The Coaster,Mendocino Transit Authority,198,50692.42,3,139926.61,36.23,"(30, 40]"
1188,288,Redding Line,Trinity Transit,344,122819.29,3,271047.5,45.31,"(40, 50]"
1932,582,Mammoth Lakes HWY 120E/395,Yosemite Area Regional Transportation System,374,290563.68,3,634216.13,45.81,"(40, 50]"
1016,225,South Coast / Ukiah,Mendocino Transit Authority,198,260518.01,3,508378.59,51.24,"(50, 60]"


## Verizon

In [62]:
verizon_df = gpd.read_parquet(f"{GCS_FILE_PATH}verizon_ca_only.parquet")

In [63]:
verizon_m1 = route_cell_coverage(verizon_df, routes_df)

In [64]:
verizon_m1["binned"].value_counts()

(90, 100]    2414
(80, 90]      168
(70, 80]       74
(60, 70]       38
(50, 60]       29
(40, 50]        3
(10, 20]        2
(30, 40]        2
(0, 10]         0
(20, 30]        0
Name: binned, dtype: int64

In [65]:
low_verizon_coverage = verizon_m1.loc[verizon_m1['percentage'] < 70]

In [70]:

low_verizon_coverage2 = low_verizon_coverage.sort_values(['percentage', 'agency']).drop_duplicates(subset = ['route_name','route_id']).reset_index(drop = True)

In [68]:
low_verizon_coverage2.drop(columns = ['geometry'])

Unnamed: 0,route_id,route_name,agency,itp_id,route_length_overlay,route_type,route_length_original_df,percentage,binned
1114,2596,"Carnelian Bay Tahoe Vista, Kings Beach, Crystal Bay, Incline Village",Tahoe Transportation,331,28061.43,3,239898.85,11.7,"(10, 20]"
1245,30,FSL,Sacramento Regional Transit District,273,19248.17,3,55102.22,34.93,"(30, 40]"
772,181,Route travels through all down-river communities between Weaverville and Willow Creek. Key route to the coast; connects with RTS service to Arcata/Eureka.,Trinity Transit,344,116333.61,3,313758.23,37.08,"(30, 40]"
1879,566,Bridgeport-Gardnerville,Eastern Sierra Transit Authority,99,160964.55,3,342423.4,47.01,"(40, 50]"
2671,Line2,Line 2,Calabasas Transit System,49,46714.89,3,94099.47,49.64,"(40, 50]"
154,1042,"Guerneville, Monte Rio",Cloverdale Transit,70,138345.3,3,272963.39,50.68,"(50, 60]"
1170,28,"Guerneville, Monte Rio",Sonoma County Transit,314,138345.3,3,272963.39,50.68,"(50, 60]"
1932,582,Mammoth Lakes HWY 120E/395,Yosemite Area Regional Transportation System,374,325523.74,3,634216.13,51.33,"(50, 60]"
1016,225,South Coast / Ukiah,Mendocino Transit Authority,198,262268.76,3,508378.59,51.59,"(50, 60]"
385,1292,,Susanville Indian Rancheria Public Transportation Program,329,189518.02,3,366783.15,51.67,"(50, 60]"


## See which how routes compare in the 2 data sets

In [77]:
m2 = pd.merge(
        low_verizon_coverage2,
        low_att_coverage2,
        how="outer",
        on=["route_id", "route_name", "itp_id", "agency"],
        suffixes=["_verizon", "_att"],
    indicator = True
    )

In [74]:
m2['_merge'].value_counts()

both          38
right_only    14
left_only      7
Name: _merge, dtype: int64

In [86]:
# Filter out for both, becuase if a route is left or right only, it means it has 70% or more coverage by at least one provider.
m3 = m2.loc[m2['_merge'] == 'both']

In [87]:
m3[['route_id', 'route_name', 'agency', 'percentage_verizon', 'percentage_att', '_merge']].sort_values('_merge')

Unnamed: 0,route_id,route_name,agency,percentage_verizon,percentage_att,_merge
0,2596,"Carnelian Bay Tahoe Vista, Kings Beach, Crystal Bay, Incline Village",Tahoe Transportation,11.7,11.7,both
24,1094,Sonora HWY 120,Yosemite Area Regional Transportation System,60.59,54.42,both
25,251,Foster City - Hillsdale Mall,SamTrans,61.06,61.06,both
26,180,Lewiston Line,Trinity Transit,61.51,63.16,both
27,2595,"Truckee, Northstar",Tahoe Transportation,62.17,64.77,both
28,580,Merced HWY 140,Yosemite Area Regional Transportation System,65.09,51.45,both
29,e3c2cf68-8f7e-4492-9646-e9a43cf2cf30,Rt 9. Transit Center to PVH via Alvin Ave.,Santa Maria Area Transit,65.18,65.93,both
30,3639,Piru Loop,Ventura County Transportation Commission,65.64,65.64,both
31,16672,Grass Valley to North San Juan,Gold Country Stage,65.85,65.5,both
32,705-240,Oakland - San Francisco,Golden Gate Bridge Highway and Transportation District,66.36,68.28,both
