# Analysis
* Create yml for all my files. 
* Save overlay? 

In [1]:
# Read in zip files
# Graphs
import altair as alt
import fsspec
import geopandas as gpd
import intake
import numpy as np
import pandas as pd

# My utilities
import utilities
from calitp import *

# Display
from IPython.display import HTML, Image, Markdown, display, display_html

# Geometry
from shared_utils import geography_utils, utils



In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [3]:
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/cellular_coverage/"

In [4]:
routes_df = utilities.load_unique_routes_df()

In [5]:
routes_df.shape

(2829, 7)

## T-Mobile

In [6]:
tmobile_df = gpd.read_parquet(f"{GCS_FILE_PATH}tmobile_california.parquet")

In [7]:
tmobile_m1 = utilities.route_cell_coverage(tmobile_df, routes_df, "_tmobile")

In [8]:
type(tmobile_m1)

geopandas.geodataframe.GeoDataFrame

In [9]:
overlay_test = utilities.comparison(tmobile_df, routes_df)

In [10]:
type(tmobile_df)

geopandas.geodataframe.GeoDataFrame

In [11]:
type(overlay_test)

geopandas.geodataframe.GeoDataFrame

In [12]:
overlay_test2 = (overlay_test.dissolve(
         by=["route_id", "route_name", "agency", "itp_id"],
         aggfunc={
         "route_length": "sum"}).reset_index()) 

In [13]:
type(overlay_test2)

geopandas.geodataframe.GeoDataFrame

In [14]:
type(routes_df)

geopandas.geodataframe.GeoDataFrame

In [15]:
m_test = overlay_test2.merge(
        routes_df,
        how="inner",
        on=["agency", "route_id", "route_name", "itp_id"],
        suffixes=["_overlay", "_original_df"],
    )
    

In [16]:
m_test.columns

Index(['route_id', 'route_name', 'agency', 'itp_id', 'geometry_overlay',
       'route_length_overlay', 'geometry_original_df', 'route_type',
       'route_length_original_df'],
      dtype='object')

In [17]:
m_test = gpd.GeoDataFrame(m_test, geometry = "geometry_overlay", crs = "EPSG:4326") 

In [18]:
m_test["percentage"] = (
        m_test["route_length_overlay"] / m_test["route_length_original_df"]
    ) * 100

In [19]:
type(m_test)

geopandas.geodataframe.GeoDataFrame

In [20]:
type(m_test)

geopandas.geodataframe.GeoDataFrame

In [21]:
tmobile_m1.shape

(2828, 11)

In [22]:
type(tmobile_m1)

geopandas.geodataframe.GeoDataFrame

In [23]:
tmobile_m1["binned_tmobile"].value_counts()

(90, 100]    2030
(80, 90]      173
(70, 80]       87
(60, 70]       37
(50, 60]       21
(40, 50]       11
(30, 40]        8
(20, 30]        4
(10, 20]        1
(0, 10]         0
Name: binned_tmobile, dtype: int64

## AT&T

In [24]:
att_df = utilities.load_att()

In [25]:
att_m1 = utilities.route_cell_coverage(att_df, routes_df, "_att")

In [26]:
type(att_m1)

geopandas.geodataframe.GeoDataFrame

In [27]:
att_df.shape, att_m1.shape

((36, 1), (2822, 11))

In [28]:
att_m1["binned_att"].value_counts()

(90, 100]    2069
(80, 90]      139
(70, 80]       75
(60, 70]       44
(50, 60]       38
(10, 20]        4
(20, 30]        2
(30, 40]        2
(40, 50]        2
(0, 10]         0
Name: binned_att, dtype: int64

## Verizon

In [29]:
verizon_df = utilities.load_verizon()

In [30]:
verizon_m1 = utilities.route_cell_coverage(verizon_df, routes_df, "_verizon")

In [31]:
verizon_df.shape, verizon_m1.shape

((74, 1), (2822, 11))

In [32]:
type(verizon_m1)

geopandas.geodataframe.GeoDataFrame

In [33]:
verizon_m1["binned_verizon"].value_counts()

(90, 100]    2414
(80, 90]      168
(70, 80]       74
(60, 70]       38
(50, 60]       29
(40, 50]        3
(10, 20]        2
(30, 40]        2
(0, 10]         0
(20, 30]        0
Name: binned_verizon, dtype: int64

## Compare routes across providers.
* https://geopandas.org/en/stable/docs/user_guide/mergingdata.html
* Make sure CRS are the same.

In [191]:
# Threshold
theshold = 70

In [192]:
# Filter out for threshold
low_att_coverage = att_m1.loc[att_m1["percentage_att"] < theshold].reset_index(drop=True)
low_verizon_coverage = verizon_m1.loc[
    verizon_m1["percentage_verizon"] < theshold
].reset_index(drop=True)
low_tmobile_coverage = tmobile_m1.loc[
    tmobile_m1["percentage_tmobile"] < theshold
].reset_index(drop=True)

In [193]:
low_att_coverage.shape, low_verizon_coverage.shape, low_tmobile_coverage.shape,

((92, 11), (74, 11), (82, 11))

In [194]:
type(low_att_coverage), type(low_verizon_coverage), type(low_tmobile_coverage)

(geopandas.geodataframe.GeoDataFrame,
 geopandas.geodataframe.GeoDataFrame,
 geopandas.geodataframe.GeoDataFrame)

### Merge Verizon & ATT

In [195]:
# Merge low att & verizon coverage
m1 =  low_att_coverage.merge(
    low_verizon_coverage,
    how="outer",
    left_on=["route_id_att", "route_name_att", "agency_att", "itp_id_att"],
    right_on=[
        "route_id_verizon",
        "route_name_verizon",
        "agency_verizon",
        "itp_id_verizon",
    ],
    indicator=True,
)

In [196]:
m1['_merge'].value_counts()

both          64
left_only     28
right_only    10
Name: _merge, dtype: int64

In [197]:
# Turn to gpd
m1 = gpd.GeoDataFrame(m1, geometry = 'geometry_overlay_verizon', crs = "EPSG:4326") 

In [198]:
type(m1)

geopandas.geodataframe.GeoDataFrame

In [199]:
m1.loc[m1["_merge"] == "both"][
    [  "route_name_att",
        "route_name_verizon",
        "percentage_verizon",
        "percentage_att"

    ]
].drop_duplicates(subset = ["route_name_att"])

Unnamed: 0,route_name_att,route_name_verizon,percentage_verizon,percentage_att
0,"Guerneville, Monte Rio","Guerneville, Monte Rio",50.68,13.43
1,Sonora HWY 120,Sonora HWY 120,60.59,54.42
4,Planada Commuter,Planada Commuter,67.6,67.65
5,,,51.67,57.87
16,Avocado Heights/Bassett/West Valinda Shuttle,Avocado Heights/Bassett/West Valinda Shuttle,66.59,67.46
25,Edmund D. Edelman Children’s Court Shuttle,Edmund D. Edelman Children’s Court Shuttle,53.87,53.88
34,"The Gold Route is operated by Arcata and Mad River Transit System and serves downtown Arcata, Humboldt State University, Valley West Shopping Center, and Alliance Rd.","The Gold Route is operated by Arcata and Mad River Transit System and serves downtown Arcata, Humboldt State University, Valley West Shopping Center, and Alliance Rd.",69.17,69.17
38,South Main & Walnut,South Main & Walnut,66.44,69.53
42,Grass Valley to North San Juan,Grass Valley to North San Juan,65.85,65.5
43,Route between Weaverville and Hayfork. Connects to Redding Line in Douglas City.,Route between Weaverville and Hayfork. Connects to Redding Line in Douglas City.,52.58,22.69


#### Check a left only value for Verizon
* Left only routes have more than 70% coverage through Verizon.

In [200]:
verizon_m1.loc[verizon_m1["route_name_verizon"] == "Ridgecrest Shuttle"][
    [
        "route_name_verizon",
        "percentage_verizon",
    ]
]

Unnamed: 0,route_name_verizon,percentage_verizon
246,Ridgecrest Shuttle,99.99


In [201]:
verizon_m1.loc[verizon_m1["route_name_verizon"] == "Mainline AM/PM"][
    [
        "route_name_verizon",
        "percentage_verizon",
    ]
]

Unnamed: 0,route_name_verizon,percentage_verizon
2719,Mainline AM/PM,72.45
2720,Mainline AM/PM,72.45


In [202]:
# Filter out for only both, these are routes that really dont have any more than 60% cellular coverage among AT&T and Verizon
m1 = m1.loc[m1["_merge"] == "both"]

In [203]:
m1 = m1.drop(columns=["_merge"])

### Merge m1 with T-Mobile

In [204]:
# Merge att & verizon coverage
m2 = pd.merge(
    m1,
    low_tmobile_coverage,
    how="outer",
    left_on=["route_id_att", "route_name_att", "agency_att", "itp_id_att"],
    right_on=[
        "route_id_tmobile",
        "route_name_tmobile",
        "agency_tmobile",
        "itp_id_tmobile",
    ],
    indicator=True,
)

In [205]:
# Turn to gpd
m1 = gpd.GeoDataFrame(m1, geometry = 'geometry_overlay_verizon', crs = "EPSG:4326") 

In [206]:
m2["_merge"].value_counts()

both          55
right_only    27
left_only      9
Name: _merge, dtype: int64

In [207]:
# Filter out for only both, these are routes that really dont have any more than treshold % cellular coverage among all 3
m3 = m2.loc[m2["_merge"] == "both"]

In [208]:
verizon_m1.loc[verizon_m1["route_name_verizon"] == "The Coaster"][
    [
        "route_name_verizon",
        "percentage_verizon",
    ]
]

Unnamed: 0,route_name_verizon,percentage_verizon
1038,The Coaster,86.87


In [209]:
m3 = m3.drop(columns=["_merge"])

In [210]:
type(m3)

geopandas.geodataframe.GeoDataFrame

# Add trips

In [211]:
trips_df = utilities.load_clean_trips_df()

In [212]:
# Merge the 2 filtered at&t and verizon dataframes so we can see which routes overlap
m4 = pd.merge(
    m3,
    trips_df,
    how="left",
    left_on=["route_id_att", "itp_id_att"],
    right_on=["route_id", "calitp_itp_id"],
    indicator=True,
)

In [213]:
m4["_merge"].value_counts()

both          46
left_only      9
right_only     0
Name: _merge, dtype: int64

In [214]:
m4.route_id.nunique()

24

In [215]:
m4 = m4.drop(columns=["_merge"])

In [226]:
type(m4)

geopandas.geodataframe.GeoDataFrame

# Add NTD
* How to incorporate total trips w/ total buses?

In [217]:
# m4["agency_att"].sort_values().unique()

In [218]:
# Load NTD vehicles
ntd_df = utilities.ntd_vehicles()

In [219]:
# ntd_df["agency"].sort_values().unique().tolist()

In [220]:
# Replace agency names in NTD to match m4 above
ntd_df["agency"] = ntd_df["agency"].replace(
    {
        "Trinity County": "Trinity Transit",
        "City of Calabasas": "Calabasas Transit System",
        "County of Sonoma": "Sonoma County Transit",
        "Tehama County": "Tehama Rural Area eXpress",
        "Los Angeles County Department of Public Works - East L.A.": "East Los Angeles Shuttle",
        "Sacramento Regional Transit District": "Sacramento Regional Transit District",
        "Eastern Sierra Transit Authority": "Mammoth Lakes Transit System",
        "City of Lompoc": "City of Lompoc Transit",
        "San Luis Obispo Regional Transit Authority": "South County Transit Link",
        "City of Roseville": "Roseville Transit",
         "Los Angeles County Dept. of Public Works - Athens Shuttle Service": "the Link-Athens",
        'Los Angeles County Department of Public Works - Avocado Heights':'Avocado Heights/Bassett/West Valinda Shuttle',
        'Susanville Indian Rancheria': "Susanville Indian Rancheria Public Transportation Program",
    }
)

In [221]:
# Replace agency names in agency4 to match ntd
m4['agency_att'] = m4['agency_att'].replace(
    {
        "Cloverdale Transit":"Sonoma County Transit",
    }
)

In [222]:
m5 = pd.merge(
    m4,
    ntd_df,
    how="left",
    left_on="agency_att",
    right_on="agency",
    indicator=True,
)

In [223]:
m5["_merge"].value_counts()

both          30
left_only     25
right_only     0
Name: _merge, dtype: int64

In [224]:
# Cols to shorten dataframe
subset_cols =  [
        "route_id_att",
        "route_name_att",
        "agency_att",
        "itp_id_att",
        "percentage_att",
        "percentage_verizon",
        "percentage_tmobile",
        'total_buses',
     'total_trips'
    ]

# Final

In [225]:
# Drop all isntance of a route_id
m5.drop_duplicates(subset = ['route_id_att']).reset_index()[subset_cols]

Unnamed: 0,route_id_att,route_name_att,agency_att,itp_id_att,percentage_att,percentage_verizon,percentage_tmobile,total_buses,total_trips
0,1042,"Guerneville, Monte Rio",Sonoma County Transit,70.0,13.43,50.68,45.53,77.0,8.0
1,1094,Sonora HWY 120,Yosemite Area Regional Transportation System,374.0,54.42,60.59,53.88,10.0,
2,1210,Planada Commuter,Merced The Bus,343.0,67.65,67.6,65.18,,6.0
3,1292,,Susanville Indian Rancheria Public Transportation Program,329.0,57.87,51.67,30.49,4.0,2.0
4,13050,Avocado Heights/Bassett/West Valinda Shuttle,Avocado Heights/Bassett/West Valinda Shuttle,171.0,67.46,66.59,67.46,4.0,23.0
5,13054,Edmund D. Edelman Children’s Court Shuttle,Avocado Heights/Bassett/West Valinda Shuttle,171.0,53.88,53.87,53.88,4.0,49.0
6,14,"The Gold Route is operated by Arcata and Mad River Transit System and serves downtown Arcata, Humboldt State University, Valley West Shopping Center, and Alliance Rd.",Arcata and Mad River Transit System,18.0,69.17,69.17,63.89,,6.0
7,144,South Main & Walnut,Susanville Indian Rancheria Public Transportation Program,329.0,69.53,66.44,69.53,4.0,11.0
8,16672,Grass Valley to North San Juan,Gold Country Stage,221.0,65.5,65.85,67.56,,10.0
9,178,Route between Weaverville and Hayfork. Connects to Redding Line in Douglas City.,Trinity Transit,344.0,22.69,52.58,38.99,7.0,8.0


In [None]:
m5.columns

In [None]:
m5.set_geometry('geometry100')