# Analysis
* Create yml for all my files. 
* Save overlay? 

In [1]:
# Read in zip files
# Graphs
import altair as alt
import fsspec
import geopandas as gpd
import intake
import numpy as np
import pandas as pd

# My utilities
import utilities
from calitp import *

# Display
from IPython.display import HTML, Image, Markdown, display, display_html

# Geometry
from shared_utils import geography_utils, utils



In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [3]:
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/cellular_coverage/"

In [4]:
routes_df = utilities.load_unique_routes_df()

## AT&T

In [5]:
att_df = utilities.load_att()

In [6]:
att_m1 = utilities.route_cell_coverage(att_df, routes_df)

In [7]:
att_m1["binned"].value_counts()

(90, 100]    2069
(80, 90]      139
(70, 80]       75
(60, 70]       44
(50, 60]       38
(10, 20]        4
(20, 30]        2
(30, 40]        2
(40, 50]        2
(0, 10]         0
Name: binned, dtype: int64

## Verizon

In [8]:
verizon_df = utilities.load_verizon()

In [9]:
verizon_m1 = utilities.route_cell_coverage(verizon_df, routes_df)

In [10]:
verizon_m1["binned"].value_counts()

(90, 100]    2414
(80, 90]      168
(70, 80]       74
(60, 70]       38
(50, 60]       29
(40, 50]        3
(10, 20]        2
(30, 40]        2
(0, 10]         0
(20, 30]        0
Name: binned, dtype: int64

## Compare routes in the 2 data sets

In [11]:
# Filter out for maybe <=60%?
low_att_coverage = att_m1.loc[att_m1["percentage"] < 61].reset_index(drop=True)
low_verizon_coverage = verizon_m1.loc[verizon_m1["percentage"] < 61].reset_index(
    drop=True
)

# Repeat of routes w/ same route id & route name run by different agencies?
# low_att_coverage2 = low_att_coverage.sort_values(['percentage', 'agency']).drop_duplicates(subset = ['route_name','route_id']).reset_index(drop = True)

In [12]:
# Merge the 2 filtered at&t and verizon dataframes so we can see which routes overlap
m2 = pd.merge(
    low_att_coverage,
    low_verizon_coverage,
    how="outer",
    on=["route_id", "route_name", "itp_id", "agency"],
    suffixes=["_att", "_verizon"],
    indicator=True,
)

In [13]:
m2["_merge"].value_counts()

both          29
left_only     24
right_only     9
Name: _merge, dtype: int64

In [14]:
# Filter out for both, becuase if a route is left or right only, it means it has 60% or more coverage by at least one provider.
m3 = m2.loc[m2["_merge"] == "both"].reset_index(drop=True)

In [15]:
# Test to make sure
verizon_m1.loc[verizon_m1['route_name'] == 'Route 20 Smith River / Arcata'][['route_name','agency','percentage']]

Unnamed: 0,route_name,agency,percentage
311,Route 20 Smith River / Arcata,Redwood Coast Transit,77.65


In [16]:
m3 = m3.drop(columns = ['_merge']) 

# Add trips

In [17]:
trips_df = utilities.load_clean_trips_df()

In [18]:
# Merge the 2 filtered at&t and verizon dataframes so we can see which routes overlap
m4 = pd.merge(
    m3,
    trips_df,
    how="left",
    left_on=["route_id", "itp_id"],
    right_on = ["route_id","calitp_itp_id"],
    indicator=True,
)

In [19]:
m4['_merge'].value_counts()

both          25
left_only      4
right_only     0
Name: _merge, dtype: int64

In [20]:
m4.route_id.nunique()

17

In [21]:
m4 = m4.drop(columns = ['_merge'])

# Add NTD
* Agencies to change
* 'Trinity County': Trity Transit
* 'City of Calabasas',
* 'County of Sonoma, dba: Sonoma County Transit': 'Sonoma County Transit'
* 'Tehama County', Tehama Rural Area eXpress
* East Los Angeles Shuttle: 'Los Angeles County Department of Public Works - East L.A.',
* 'Sacramento Regional Transit District, dba: Sacramento RT':'Sacramento Regional Transit District'
* 'Eastern Sierra Transit Authority': Mammoth Lakes Transit System
* 'City of Lompoc, dba: Lompoc Transit': City of Lompoc Transit
* 'San Luis Obispo Regional Transit Authority': South County Transit Link
* 'City of Roseville, dba: Roseville Transit': Roseville Transit
* the Link-Athens

In [22]:
# m4[['route_id', 'route_name', 'agency', 'itp_id','total_trips', 'percentage_att', 'binned_att', 'percentage_verizon', 'binned_verizon','_merge']].sort_values('route_name')

In [23]:
# Merge the above with ntd_data
ntd_df = utilities.ntd_vehicles()

In [29]:
ntd_df['agency'].sort_values().unique().tolist()

['Access Services',
 'Alameda-Contra Costa Transit District',
 'Alpine County Local Transportation Commission',
 'Amador Regional Transit System',
 'Anaheim Transportation Network',
 'Antelope Valley Transit Authority',
 'Blue Lake Rancheria',
 'Butte County Association of Governments',
 'Calaveras Transit Agency',
 'California Vanpool Authority',
 'Central Contra Costa Transit Authority',
 'City and County of San Francisco',
 'City of Agoura Hills',
 'City of Alhambra',
 'City of Arcadia',
 'City of Arcata',
 'City of Arvin',
 'City of Atascadero',
 'City of Auburn',
 'City of Avalon',
 'City of Azusa',
 'City of Baldwin Park',
 'City of Bell',
 'City of Bell Gardens',
 'City of Bellflower',
 'City of Beverly Hills',
 'City of Burbank',
 'City of Calabasas',
 'City of California City',
 'City of Camarillo',
 'City of Carson',
 'City of Cerritos',
 'City of Chowchilla',
 'City of Claremont',
 'City of Commerce',
 'City of Compton ',
 'City of Corcoran',
 'City of Corona',
 'City of Cov

In [25]:
m5 = pd.merge(
    m4,
    ntd_df,
    how="left",
    on = "agency",
    indicator=True,
)

In [26]:
m5['_merge'].value_counts()

left_only     24
both           5
right_only     0
Name: _merge, dtype: int64

In [27]:
m5.loc[m5['_merge'] == 'left_only'][['route_id','agency']].sort_values(by = ['route_id','agency']).drop_duplicates()

Unnamed: 0,route_id,agency
0,1042,Cloverdale Transit
2,1292,Susanville Indian Rancheria Public Transportation Program
3,1292,Tehama Rural Area eXpress
4,13054,Avocado Heights/Bassett/West Valinda Shuttle
5,13054,East Los Angeles Shuttle
6,13054,East Valinda Shuttle
7,13054,Sunshine Bus(South Whittier)
8,13054,the Link Florence-Firestone/Walnut Park
9,13054,the Link King Medical Center
10,13054,the Link Lennox


In [28]:
# m5[['route_id', 'route_name', 'agency', 'itp_id','total_trips', 'percentage_att', 'binned_att', 'percentage_verizon', 'binned_verizon','_merge']].sort_values('route_name')