# Exploring Cell Coverage of Routes
* To do later: move data sources to a catalog
* An agency here is defined: as a service and an operator...

In [None]:
# Read in zip files
import fsspec
import geopandas as gpd
import intake
import numpy as np
import pandas as pd

# My utilities
import utilities
from calitp import *

# Display
from IPython.display import HTML, Image, Markdown, display, display_html

# Geometry
from shared_utils import geography_utils, utils

In [None]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [None]:
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/cellular_coverage/"

## Trip Routes 

In [None]:
cols_without_geometry = [
    "agency",
    "route_name",
    "itp_id",
    "route_id",
    "route_type",
]

In [None]:
routes_df = gpd.read_parquet(
    "gs://calitp-analytics-data/data-analyses/traffic_ops/ca_transit_routes.parquet"
)

In [None]:
routes_df = utilities.unique_routes(routes_df)

In [None]:
# Fill in NA for route names and agency names
routes_df[["agency", "route_name"]] = routes_df[["agency", "route_name"]].fillna("NA")

In [None]:
f"""{routes_df.route_id.nunique()} unique route ids, 
{routes_df.route_name.nunique()} different route names, 
and {routes_df.route_length.nunique()} different shape ids.
The dataframe has {len(routes_df)} rows."""

## FCC Maps

In [None]:
""" T Mobile not working
tmobile_df = utilities.create_california_coverage(
    "T_Mobile_LTE_Data.zip", "tmobile_ca_only"
)
"""

In [None]:
verizon_df = gpd.read_parquet(f"{GCS_FILE_PATH}verizon_ca_only.parquet")

In [None]:
att_df = gpd.read_parquet(f"{GCS_FILE_PATH}att_ca_only.parquet")

In [None]:
att_df.plot(), verizon_df.plot()

## Overlay 

In [None]:
overlay_verizon = utilities.comparison(routes_df, verizon_df)

In [None]:
overlay_att = utilities.comparison(routes_df, att_df)

In [None]:
f""""
Verizon: After overlap, there are {overlay_verizon['route_id'].nunique()} route ids compared to 
{routes_df['route_id'].nunique()} in the original dataframe. 
The length of the overlay dataframe is {len(overlay_verizon)}"""

In [None]:
f""""AT&T: After overlap, there are {overlay_att['route_id'].nunique()} route ids compared to 
{routes_df['route_id'].nunique()} in the original dataframe. 
The length of the overlay dataframe is {len(overlay_att)}"""

In [None]:
overlay_att.plot("route_length", legend=True), overlay_verizon.plot("route_length")

### Test finding % with a single route...

In [None]:
# Test a single route that goes through 2 counties
# This one goes through San Francisco & Marin.
single_route_overlay = overlay_att.loc[overlay_att["route_name"] == "via Civic Center"]

In [None]:
# Original Df
single_route_og = routes_df.loc[routes_df["route_name"] == "via Civic Center"]

In [None]:
# Agg original Df
single_route_og = (
    single_route_og.groupby(["agency", "route_name"])
    .agg({"route_length": "sum"})
    .reset_index()
)

In [None]:
# Agg overlay Df
single_route_overlay = (
    single_route_overlay.groupby(["agency", "route_name"])
    .agg({"route_length": "sum"})
    .reset_index()
)

In [None]:
# Merge
single_merge = pd.merge(
    single_route_og,
    single_route_overlay,
    how="outer",
    on=["agency", "route_name"],
    suffixes=["_original", "_overlay"],
)

In [None]:
# Divide lengths to find overlap
single_merge["overlap_percentage"] = (
    single_merge["route_length_overlay"] / single_merge["route_length_original"]
).astype("float64")

In [None]:
single_merge

### Test with AT&T

In [None]:
# Aggregate original df
routes_agg = (
    routes_df.groupby(["agency", "route_id", "route_name"])
    .agg({"route_length": "sum"})
    .reset_index()
)

In [None]:
# Aggregate AT&T df
att_agg = (
    overlay_att.groupby(["agency", "route_id", "route_name"])
    .agg({"route_length": "sum"})
    .reset_index()
)

In [None]:
att_merge = pd.merge(
    routes_agg,
    att_agg,
    how="inner",
    on=["agency", "route_id", "route_name"],
    suffixes=["_original", "_overlay"],
)

In [None]:
# Overlap, mutiply by 100
att_merge["overlap_percentage"] = (
    ((att_merge["route_length_overlay"] / att_merge["route_length_original"]) * 100)
).astype("int64")

In [None]:
# Bin overlap
bins = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

In [None]:
att_merge["binned"] = pd.cut(att_merge["overlap_percentage"], bins)

In [None]:
att_merge.binned.value_counts()

In [None]:
# Filter out for routes that have have < 75 percent coverage
att_merge_low_coverage = att_merge.loc[att_merge["overlap_percentage"] < 70]

In [None]:
f"{len(att_merge_low_coverage)} after filtering out for rows that  < 70% coverage."

In [None]:
att_merge_low_coverage.sort_values("overlap_percentage").head(5)

#### To Do: create an agency-itp id table from above to match cal itp id with trips_df, so there will be more merge matches.

## Trips
* How many trips are running for a route?

In [None]:
trips_df = pd.read_parquet(
    "gs://calitp-analytics-data/data-analyses/rt_delay/compiled_cached_views/trips_2022-05-04_all.parquet"
)

In [None]:
trips_df.shape

In [None]:
# Try to standardize route id for the 2 dfs
att_merge_low_coverage["route_id"] = (
    att_merge_low_coverage["route_id"].str.lower().str.strip()
)
trips_df["route_id"] = trips_df["route_id"].str.lower().str.strip()

In [None]:
att_routes_list = att_merge_low_coverage["route_id"].unique().tolist()

In [None]:
# Filter out for route IDS
trips_df2 = trips_df[trips_df["route_id"].isin(att_routes_list)]

In [None]:
# So many route ids disappeared
f'{trips_df2["route_id"].nunique()} routes left after filtering trips_df compared to {len(att_routes_list)} routes in overlay.'

In [None]:
# Checking which ones are missing.
id_routes = set(att_routes_list)
id_trips = set(trips_df2.route_id.unique().tolist())
missing_routes = list(id_routes - id_trips)

In [None]:
f"{1-(len(id_trips)/len(id_routes))}% of routes are missing."

In [None]:
trips_df2.columns

In [None]:
# Merge for overlap...
# Correct agency names later? maybe?
trips_df3 = pd.merge(
    trips_df2,
    att_merge_low_coverage,
    how="outer",
    left_on=["route_id", "route_long_name"],
    right_on=["route_id", "route_name"],
    indicator=True,
)

In [None]:
trips_df3["_merge"].value_counts()

In [None]:
# Sum up number of trips taken by route using trip id?
(
    trips_df3.loc[trips_df3["_merge"] == "both"]
    .groupby(
        [
           
            "route_name",
            "overlap_percentage",
        ]
    )
    .agg({"trip_id": "nunique"})
    .rename(columns={"trip_id": "total_trips"})
)