## Analysis

In [1]:
import A3_analysis
import A2_other
import A1_provider_prep

import pandas as pd
import geopandas as gpd
from calitp.sql import to_snakecase
from shared_utils import geography_utils, utils



In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [3]:
# Read in no coverage maps
verizon = gpd.read_parquet(f"{A1_provider_prep.GCS_FILE_PATH}verizon_no_coverage_cal.parquet")

In [4]:
att = gpd.read_parquet(f"{A1_provider_prep.GCS_FILE_PATH}att_no_coverage_cal.parquet")

In [5]:
tmobile = gpd.read_parquet(f"{A1_provider_prep.GCS_FILE_PATH}tmobile_no_coverage_cal.parquet")

In [6]:
# Original provider map from FCC
att_og = gpd.read_parquet("./ATT.parquet")

In [7]:
# att_og.plot()

In [8]:
# att.plot()

### Which routes touch areas without data coverage among all 3 providers?

In [9]:
# Find routes that run in areas without coverage 
# Across all 3 providers
routes1 = A3_analysis.merge_all_providers()

In [10]:
# Load in original geometries of the routes.
one_dist_routes, multi_dist_routes, all_routes = A2_other.find_multi_district_routes()

In [11]:
# Create bins to see the median percentage of the route WITH coverage.
bins = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
routes1["binned"] = pd.cut(routes1["median_percent_with_coverage"], bins)

In [12]:
f"There are {len(routes1)} routes that cross a zone w/o data coverage among ATT, T-Mobile, and Verizon out of {len(all_routes)} routes."

'There are 226 routes that cross a zone w/o data coverage among ATT, T-Mobile, and Verizon out of 2914 routes.'

### Filter out for low coverage routes
* Most routes (776 out of 865) have data coverage for 90-100% of the routes.
* Cut off based on bins.

In [13]:
routes1.binned.value_counts()

(90, 100]    140
(80, 90]      38
(70, 80]      29
(60, 70]       9
(50, 60]       6
(30, 40]       2
(40, 50]       2
(0, 10]        0
(10, 20]       0
(20, 30]       0
Name: binned, dtype: int64

In [14]:
# Filter out for routes where 75% or less of its length runs in a no data-coverage zone.
threshold = 75

In [15]:
low_coverage = (routes1.loc[routes1.median_percent_with_coverage < threshold]).reset_index(drop = True)

In [16]:
f"{len(low_coverage)} routes are considered low-data coverage"

'30 routes are considered low-data coverage'

### How many buses run through a route that has "low data coverage?"
* Find number of trips ran per these "low coverage" routes
* Find number of buses each agency from this "low coverage" dataframe owns.

In [17]:
final = A3_analysis.final_merge(low_coverage)

### Conclusions
* Change D-1 to full district names.

In [18]:
final.drop(columns = ["Geometry", "Binned"])

Unnamed: 0,Agency,Long Route Name,District,Median Percent With Coverage,Median Percent No Coverage,Total Trips By Route,Total Trips By Agency,Percentage Of Trips W Low Cell Service,Estimate Of Buses In Low Cell Zones
0,Trinity Transit,Route travels through all down-river communities between Weaverville and Willow Creek. Key route to the coast; connects with RTS service to Arcata/Eureka. 181 Trinity Transit,"D-1,D-1,D-2",36.0,64.0,4,17,0,1
1,AC Transit,Sacramento - Christie Transbay j AC Transit,D-4,74.0,26.0,8,5587,0,1
2,AC Transit,Piedmont - Oakland Ave. Transbay p AC Transit,D-4,71.0,29.0,12,5587,0,1
3,Arcata and Mad River Transit System,The Willow Creek/Arcata Route is operated by Humboldt Transit Authority and travels along Trinity Highway between the communities of Arcata and Willow Creek. 8 Arcata and Mad River Transit System,D-1,74.0,26.0,5,154,0,1
4,Blue Lake Rancheria,The Willow Creek/Arcata Route is operated by Humboldt Transit Authority and travels along Trinity Highway between the communities of Arcata and Willow Creek. 8 Blue Lake Rancheria,D-1,74.0,26.0,5,154,0,1
5,Capitol Corridor,Shuttle to San Francisco Transbay Terminal sf Capitol Corridor,D-4,60.0,40.0,18,45,0,11
6,Eastern Sierra Transit Authority,Bridgeport-Gardnerville 566 Eastern Sierra Transit Authority,D-9,71.0,29.0,2,122,0,1
7,Eureka Transit Service,The Willow Creek/Arcata Route is operated by Humboldt Transit Authority and travels along Trinity Highway between the communities of Arcata and Willow Creek. 8 Eureka Transit Service,D-1,74.0,26.0,5,154,0,1
8,Gold Country Stage,Grass Valley to North San Juan 16672 Gold Country Stage,D-3,68.0,32.0,10,113,0,2
9,Golden Gate Bridge Highway and Transportation District,Del Norte BART Station - San Rafael 580 Golden Gate Bridge Highway and Transportation District,D-4,72.0,28.0,27,264,0,16


In [19]:
f"Around {final['Estimate Of Buses In Low Cell Zones'].sum()} buses run through low data coverage routes."

'Around 88 buses run through low data coverage routes.'

In [20]:
f"There {final['Long Route Name'].nunique()} low data coverage routes."

'There 30 low data coverage routes.'

In [21]:
final.District.value_counts()

D-4            8
D-1            7
D-6            3
D-2            3
D-9            2
D-5            2
D-10           2
D-1,D-1,D-2    1
D-3            1
D-7            1
Name: District, dtype: int64

In [22]:
final.Agency.value_counts()

Trinity Transit                                           4
Golden Gate Bridge Highway and Transportation District    3
Santa Cruz Metropolitan Transit District                  2
Redwood Coast Transit                                     2
MUNI                                                      2
AC Transit                                                2
Yosemite Area Regional Transportation System              2
Eastern Sierra Transit Authority                          2
Gold Country Stage                                        1
Eureka Transit Service                                    1
Kern Transit                                              1
Los Angeles Department of Transportation                  1
Capitol Corridor                                          1
Mendocino Transit Authority                               1
Blue Lake Rancheria                                       1
Arcata and Mad River Transit System                       1
Tulare County Area Transit              

#### Map

In [23]:
# Grab the names of routes are "low coverage."
# low_coverage_routes = low_coverage.long_route_name.unique().tolist()

In [24]:
# Get original geometry
#low_coverage_og_geometry = (
#    all_routes[all_routes["long_route_name"].isin(low_coverage_routes)]
#).reset_index(drop=True)

In [25]:
tmobile_map = tmobile.explore(
    tiles="CartoDB positron",
    width=800,
    height=500,
)

In [26]:
tmobile_map = final.explore("Long Route Name",
    m=tmobile_map,legend = False,
)

# tmobile_map