### Analysis V2


In [None]:
import dask.dataframe as dd
import dask_geopandas as dg
import geopandas as gpd
import numpy as np
import pandas as pd
import shapely.wkt
import A1_provider_prep
import A2_analysis
from calitp.sql import to_snakecase
from shared_utils import geography_utils, utils

In [None]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

### Load Provider Maps

In [None]:
# Simple Version
verizon_simple, att_simple, tmobile_simple = A1_provider_prep.simplify_geometry_all_providers()

In [None]:
# Geometries are true across each gdf
# verizon_simple.geometry.is_valid.all(), att_simple.geometry.is_valid.all(), tmobile_simple.geometry.is_valid.all(),

In [None]:
att_og = A1_provider_prep.load_att()
tmobile_og = A1_provider_prep.load_tmobile()

In [None]:
# Geometries are true across each gdf
# verizon_og.geometry.is_valid.all(), att_og.geometry.is_valid.all(), tmobile_og.geometry.is_valid.all(),

In [None]:
unique_routes = A2_analysis.load_unique_routes_df()

In [None]:
# Ensure CRS are the same
unique_routes.crs == att_simple.crs == tmobile_simple.crs == verizon_simple.crs

In [None]:
def split_routes():
    unique_routes = A2_analysis.load_unique_routes_df()
    # Split unique_routes to 4 different ones
    # https://stackoverflow.com/questions/17315737/split-a-large-pandas-dataframe
    df_split = np.array_split(unique_routes, 4)
    
    df1 = df_split[0]
    df2 = df_split[1]
    df3 = df_split[2]
    df4 = df_split[3]
    
    return df1,df2,df3,df4

In [None]:
r1,r2,r3,r4 = split_routes()

### Notes
* Verizon_simple does not work with `comparison.` Around 500 routes are passed when using `overlay_single_routes`.
* Verizon_og does not work. Returns the error: `TopologyException: side location conflict at -122.30299999995003 37.938999999750251. This can occur if the input geometry is invalid.`
* Both gdf returns `true` when `.geometry.is_valid.all()` is applied to them.

In [None]:
# TopologyException: side location conflict at -122.30299999995003 37.938999999750251. This can occur if the input geometry is invalid.
# verizon_o = utilities.comparison(verizon_og, unique_routes, "verizon_simplified")

In [None]:
# verizon_o = utilities.comparison(verizon_og, unique_routes, "verizon")

### Function Ideas
* Other Function should:
    * Drop Duplicates
    * Sum up new route length by long route name
    * Find max of original route length
    * Divide new route length by original to get percentage covered. 
    * Del irrelevant columns
    * Does it have to be a gdf at this point or can it just be a normal dataframe?
* Now if something has a low percentage of intersecting: that is a good sign? 
* Maps only contain areas *without* coverage. So if it only intersects a little, means route mostly has coverage?

In [None]:
att_overlay = A2_analysis.comparison(att_og, r1, "att_r1")

In [None]:
tmobile_overlay = gpd.read_parquet(
    f"{A1_provider_prep.GCS_FILE_PATH}tmobile_simplified_overlaid_with_unique_routes.parquet"
)

In [None]:
tmobile_agg = A2_analysis.dissolve_summarize(tmobile_overlay)

In [None]:
# att_agg[att_agg.agency == 'AC Transit'][['agency','long_route_name','percentage_route_covered']]

In [None]:
len(tmobile_agg), tmobile_agg.long_route_name.nunique()

In [None]:
ac_transit_only = tmobile_agg[tmobile_agg.agency == 'AC Transit']

In [None]:
"""
ac_transit_only.explore('long_route_name',
               width=800,
    height=400,
    style_kwds={"weight": 6},
    legend=False,
    color="tab10c",)
"""