# Exploring Cell Coverage of Routes
* To do later: move data sources to a catalog
* An agency here is defined: as a service and an operator...

In [1]:
import numpy as np
import pandas as pd
from calitp import *
from shared_utils import utils

# Geometry
from shared_utils import geography_utils
import contextily as cx
import geopandas as gpd


# Read in zip files
import fsspec

# Display 
from IPython.display import HTML, Image, Markdown, display, display_html



In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [3]:
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/cellular_coverage/"

## California County Borders

In [4]:
ca_gdf = gpd.read_file(
    "https://opendata.arcgis.com/datasets/8713ced9b78a4abb97dc130a691a8695_0.geojson"
)

## FCC AT&T Data Map 
* Testing with AT&T first.

In [5]:
PATH = f"{GCS_FILE_PATH}ATT_Mobility_LTE_Data.zip"

In [6]:
with fsspec.open(PATH) as file:
    fcc_gdf = gpd.read_file(file)

In [7]:
fcc_gdf.shape

(582, 5)

### Clip AT&T map to only include California Counties

In [8]:
# Check that they are the same CRS
fcc_gdf.crs == ca_gdf.crs

True

In [9]:
fcc_ca_gdf = gpd.clip(fcc_gdf, ca_gdf)

In [10]:
# Check that it worked
fcc_ca_gdf.shape

(36, 5)

In [11]:
# fcc_ca_gdf.plot(figsize =(40, 20))

## Trip Routes 

In [12]:
routes_df = gpd.read_parquet(
    "gs://calitp-analytics-data/data-analyses/traffic_ops/ca_transit_routes.parquet")

In [13]:
# What does route type mean?
routes_df['route_type'].value_counts()

3    7397
2     183
0     119
4      57
5      31
1       4
Name: route_type, dtype: int64

In [14]:
cols_without_geometry = ['agency','route_name', 'itp_id', 'route_id', 'shape_id', 'route_type',
       ]

In [15]:
# Drop Amtrak
routes_df2 = routes_df.loc[routes_df['agency'] !=  'Amtrak']

In [17]:
# Drop Bay Area Ferry Service - ferry only
routes_df2 = routes_df2.loc[routes_df2['agency'] != 'San Francisco Bay Ferry']

In [None]:
# Drop routes for ferry - DOUBLE CHECK THESE REALLY ARE FERRY ROUTES
routes_df2[(routes_df2.route_name.str.contains("Ferry", case=False))]

In [None]:
f'{len(routes_df2)} rows left after dropping - compared to {len(routes_df)} rows before.'

In [None]:
# Fill in NA for route names and agency names
routes_df2[['agency','route_name']] = routes_df2[['agency','route_name']].fillna('None')

In [None]:
f'{routes_df2.route_id.nunique()} unique route ids, {routes_df2.route_name.nunique()} different names, and {routes_df2.shape_id.nunique()} different shape ids.'

In [None]:
# Keep only one instance of a unique ITP ID - Route ID - Route Type combination 
routes_df3 = routes_df2.drop_duplicates(subset=['itp_id','route_id', 'route_type'])

In [None]:
# Reset index after dropping
routes_df3 = routes_df3.reset_index()

In [None]:
routes_df3.shape

## Overlay 
* I want to obtain routes that are NOT contained in the AT & T coverage map.
* https://geopandas.org/en/stable/docs/user_guide/set_operations.html

In [None]:
# Check that they are the same CRS
fcc_ca_gdf.crs == routes_df3.crs

In [None]:
overlay_df = routes_df3.overlay(fcc_ca_gdf, how='difference')

In [None]:
overlay_df.shape

In [None]:
# Map
overlay_df_map = overlay_df.to_crs(epsg=3857)

In [None]:
# Change to overlay later
cx.add_basemap(overlay_df_map.plot(figsize =(16,12), markersize = 30))

In [None]:
display(Markdown(f'''There are {overlay_df['route_id'].nunique()} routes w/o AT&T coverage, compared to {routes_df2.route_id.nunique()}
total routes in the original dataframe.
''' ))

In [None]:
# Previewing the routes left
# overlay_df[cols_without_geometry].sort_values('route_name')

In [None]:
# Agencies with the most routes without coverage. 
overlay_df.groupby(['agency']).agg({'route_id':'nunique'}).rename(columns = {'route_id':'total_routes'}).sort_values('total_routes', ascending = False).head(10)

## Trip Stops

In [None]:
stops_df = gpd.read_parquet(
    "gs://calitp-analytics-data/data-analyses/traffic_ops/ca_transit_stops.parquet")

In [None]:
stops_df.head(2)

In [None]:
stops_df.shape

In [None]:
# Filter out for only routes in overlay_df
route_id_without_cell = overlay_df['route_id'].unique().tolist()

In [None]:
stops_df2 = stops_df[stops_df["route_id"].isin(route_id_without_cell)]

In [None]:
f'{stops_df2.route_id.nunique()} route ids in trip stops dataframe compared to {overlay_df.route_id.nunique()} trip routes dataframe above...strange.'

In [None]:
# What the missing route ids?
routes_df_id = set(route_id_without_cell)
trips_df_id = set(stops_df.route_id.unique().tolist())
routes_df_id - trips_df_id

In [None]:
total_stops_df = stops_df2.groupby(['agency','route_name']).agg({'stop_id':'count'}).reset_index().rename(columns = {'stop_id':'total_stops'}) 

In [None]:
len(total_stops_df)

In [None]:
# total_stops_df