# Analysis
* Create yml for all my files. 
* Save overlay? 

In [1]:
# Read in zip files
# Graphs
import altair as alt
import fsspec
import geopandas as gpd
import intake
import numpy as np
import pandas as pd

# My utilities
import utilities
from calitp import *

# Display
from IPython.display import HTML, Image, Markdown, display, display_html

# Geometry
from shared_utils import geography_utils, utils



In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [3]:
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/cellular_coverage/"

In [4]:
routes_df = utilities.load_unique_routes_df()

In [5]:
routes_df.shape

(2829, 7)

## T-Mobile

In [6]:
tmobile_df = gpd.read_parquet(f"{GCS_FILE_PATH}tmobile_california.parquet")

In [7]:
tmobile_m1 = utilities.route_cell_coverage(tmobile_df, routes_df, "_tmobile")

In [8]:
tmobile_m1.shape

(2828, 10)

In [12]:
tmobile_m1.columns

Index(['route_id_tmobile', 'route_name_tmobile', 'agency_tmobile',
       'itp_id_tmobile', 'route_length_overlay_tmobile', 'geometry_tmobile',
       'route_type_tmobile', 'route_length_original_df_tmobile',
       'percentage_tmobile', 'binned_tmobile'],
      dtype='object')

In [13]:
tmobile_m1["binned_tmobile"].value_counts()

(90, 100]    2030
(80, 90]      173
(70, 80]       87
(60, 70]       37
(50, 60]       21
(40, 50]       11
(30, 40]        8
(20, 30]        4
(10, 20]        1
(0, 10]         0
Name: binned_tmobile, dtype: int64

## AT&T

In [14]:
att_df = utilities.load_att()

In [15]:
att_m1 = utilities.route_cell_coverage(att_df, routes_df, "_att")

In [16]:
att_df.shape, att_m1.shape

((36, 1), (2822, 10))

In [17]:
att_m1["binned_att"].value_counts()

(90, 100]    2069
(80, 90]      139
(70, 80]       75
(60, 70]       44
(50, 60]       38
(10, 20]        4
(20, 30]        2
(30, 40]        2
(40, 50]        2
(0, 10]         0
Name: binned_att, dtype: int64

## Verizon

In [18]:
verizon_df = utilities.load_verizon()

In [19]:
verizon_m1 = utilities.route_cell_coverage(verizon_df, routes_df, "_verizon")

In [20]:
verizon_df.shape, verizon_m1.shape

((74, 1), (2822, 10))

In [21]:
verizon_m1["binned_verizon"].value_counts()

(90, 100]    2414
(80, 90]      168
(70, 80]       74
(60, 70]       38
(50, 60]       29
(40, 50]        3
(10, 20]        2
(30, 40]        2
(0, 10]         0
(20, 30]        0
Name: binned_verizon, dtype: int64

## Compare routes across providers.

In [321]:
# Threshold
theshold = 61

In [322]:
low_att_coverage = att_m1.loc[att_m1["percentage_att"] < theshold].reset_index(drop=True)
low_verizon_coverage = verizon_m1.loc[
    verizon_m1["percentage_verizon"] < theshold
].reset_index(drop=True)
low_tmobile_coverage = tmobile_m1.loc[
    tmobile_m1["percentage_tmobile"] < theshold
].reset_index(drop=True)

In [323]:
low_att_coverage.shape, low_verizon_coverage.shape, low_tmobile_coverage.shape,

((53, 10), (38, 10), (47, 10))

In [360]:
# Turned into a normal df?
type(low_att_coverage)

pandas.core.frame.DataFrame

### Merge Verizon & ATT

In [324]:
# Merge low att & verizon coverage
m1 = pd.merge(
    low_att_coverage,
    low_verizon_coverage,
    how="outer",
    left_on=["route_id_att", "route_name_att", "agency_att", "itp_id_att"],
    right_on=[
        "route_id_verizon",
        "route_name_verizon",
        "agency_verizon",
        "itp_id_verizon",
    ],
    indicator=True,
)

In [325]:
m1["_merge"].value_counts()

both          29
left_only     24
right_only     9
Name: _merge, dtype: int64

#### Check a left only value for Verizon

In [326]:
verizon_m1.loc[verizon_m1["route_name_verizon"] == "Ridgecrest Shuttle"][
    [
        "route_name_verizon",
        "percentage_verizon",
    ]
]

Unnamed: 0,route_name_verizon,percentage_verizon
246,Ridgecrest Shuttle,99.99


In [327]:
verizon_m1.loc[verizon_m1["route_name_verizon"] == "Mainline AM/PM"][
    [
        "route_name_verizon",
        "percentage_verizon",
    ]
]

Unnamed: 0,route_name_verizon,percentage_verizon
2719,Mainline AM/PM,72.45
2720,Mainline AM/PM,72.45


In [328]:
# Filter out for only both, these are routes that really dont have any more than 60% cellular coverage among AT&T and Verizon
m1 = m1.loc[m1["_merge"] == "both"]

In [329]:
m1 = m1.drop(columns=["_merge"])

### Merge m1 with T-Mobile

In [330]:
# Merge att & verizon coverage
m2 = pd.merge(
    m1,
    low_tmobile_coverage,
    how="outer",
    left_on=["route_id_att", "route_name_att", "agency_att", "itp_id_att"],
    right_on=[
        "route_id_tmobile",
        "route_name_tmobile",
        "agency_tmobile",
        "itp_id_tmobile",
    ],
    indicator=True,
)

In [331]:
m2["_merge"].value_counts()

right_only    24
both          23
left_only      6
Name: _merge, dtype: int64

In [332]:
# Filter out for only both, these are routes that really dont have any more than 60% cellular coverage among all 3
m3 = m2.loc[m2["_merge"] == "both"]

In [333]:
verizon_m1.loc[verizon_m1["route_name_verizon"] == "The Coaster"][
    [
        "route_name_verizon",
        "percentage_verizon",
    ]
]

Unnamed: 0,route_name_verizon,percentage_verizon
1038,The Coaster,86.87


In [334]:
m3 = m3.drop(columns=["_merge"])

In [359]:
type(m3)

pandas.core.frame.DataFrame

# Add trips

In [335]:
trips_df = utilities.load_clean_trips_df()

In [336]:
# Merge the 2 filtered at&t and verizon dataframes so we can see which routes overlap
m4 = pd.merge(
    m3,
    trips_df,
    how="left",
    left_on=["route_id_att", "itp_id_att"],
    right_on=["route_id", "calitp_itp_id"],
    indicator=True,
)

In [337]:
m4["_merge"].value_counts()

both          19
left_only      4
right_only     0
Name: _merge, dtype: int64

In [338]:
m4.route_id.nunique()

10

In [339]:
m4 = m4.drop(columns=["_merge"])

In [349]:
m4.columns

Index(['route_id_att', 'route_name_att', 'agency_att', 'itp_id_att',
       'route_length_overlay_att', 'geometry_att', 'route_type_att',
       'route_length_original_df_att', 'percentage_att', 'binned_att',
       'route_id_verizon', 'route_name_verizon', 'agency_verizon',
       'itp_id_verizon', 'route_length_overlay_verizon', 'geometry_verizon',
       'route_type_verizon', 'route_length_original_df_verizon',
       'percentage_verizon', 'binned_verizon', 'route_id_tmobile',
       'route_name_tmobile', 'agency_tmobile', 'itp_id_tmobile',
       'route_length_overlay_tmobile', 'geometry_tmobile',
       'route_type_tmobile', 'route_length_original_df_tmobile',
       'percentage_tmobile', 'binned_tmobile', 'calitp_itp_id', 'route_id',
       'total_trips'],
      dtype='object')

# Add NTD

In [340]:
# m4["agency_att"].sort_values().unique()

In [341]:
# Load NTD vehicles
ntd_df = utilities.ntd_vehicles()

In [342]:
# ntd_df["agency"].sort_values().unique().tolist()

In [343]:
# Replace agency names in NTD to match m4 above
ntd_df["agency"] = ntd_df["agency"].replace(
    {
        "Trinity County": "Trinity Transit",
        "City of Calabasas": "Calabasas Transit System",
        "County of Sonoma": "Sonoma County Transit",
        "Tehama County": "Tehama Rural Area eXpress",
        "Los Angeles County Department of Public Works - East L.A.": "East Los Angeles Shuttle",
        "Sacramento Regional Transit District": "Sacramento Regional Transit District",
        "Eastern Sierra Transit Authority": "Mammoth Lakes Transit System",
        "City of Lompoc": "City of Lompoc Transit",
        "San Luis Obispo Regional Transit Authority": "South County Transit Link",
        "City of Roseville": "Roseville Transit",
         "Los Angeles County Dept. of Public Works - Athens Shuttle Service": "the Link-Athens",
        'Los Angeles County Department of Public Works - Avocado Heights':'Avocado Heights/Bassett/West Valinda Shuttle',
        'Susanville Indian Rancheria': "Susanville Indian Rancheria Public Transportation Program",
    }
)

In [344]:
# Replace agency names in agency4 to match ntd
m4['agency_att'] = m4['agency_att'].replace(
    {
        "Cloverdale Transit":"Sonoma County Transit",
    }
)

In [345]:
m5 = pd.merge(
    m4,
    ntd_df,
    how="left",
    left_on="agency_att",
    right_on="agency",
    indicator=True,
)

In [346]:
m5["_merge"].value_counts()

both          17
left_only      6
right_only     0
Name: _merge, dtype: int64

In [352]:
# Cols to shorten dataframe
subset_cols =  [
        "route_id_att",
        "route_name_att",
        "agency_att",
        "itp_id_att",
        "percentage_att",
        "percentage_verizon",
        "percentage_tmobile",
        'total_buses',
     'total_trips'
    ]

# Final

In [353]:
# Drop all isntance of a route_id
m5.drop_duplicates(subset = ['route_id_att']).reset_index()[subset_cols]

Unnamed: 0,route_id_att,route_name_att,agency_att,itp_id_att,percentage_att,percentage_verizon,percentage_tmobile,total_buses,total_trips
0,1042,"Guerneville, Monte Rio",Sonoma County Transit,70.0,13.43,50.68,45.53,77.0,8.0
1,1094,Sonora HWY 120,Yosemite Area Regional Transportation System,374.0,54.42,60.59,53.88,10.0,
2,1292,,Susanville Indian Rancheria Public Transportation Program,329.0,57.87,51.67,30.49,4.0,2.0
3,13054,Edmund D. Edelman Children’s Court Shuttle,Avocado Heights/Bassett/West Valinda Shuttle,171.0,53.88,53.87,53.88,4.0,49.0
4,178,Route between Weaverville and Hayfork. Connects to Redding Line in Douglas City.,Trinity Transit,344.0,22.69,52.58,38.99,7.0,8.0
5,181,Route travels through all down-river communities between Weaverville and Willow Creek. Key route to the coast; connects with RTS service to Arcata/Eureka.,Trinity Transit,344.0,29.83,37.08,35.12,7.0,4.0
6,225,South Coast / Ukiah,Mendocino Transit Authority,198.0,51.24,51.59,49.86,35.0,4.0
7,28,"Guerneville, Monte Rio",Sonoma County Transit,314.0,13.43,50.68,45.53,77.0,4.0
8,30,FSL,Sacramento Regional Transit District,273.0,34.93,34.93,34.93,295.0,19.0
9,582,Mammoth Lakes HWY 120E/395,Yosemite Area Regional Transportation System,374.0,45.81,51.33,40.59,10.0,
