### Analysis V2


In [1]:
import dask.dataframe as dd
import dask_geopandas as dg
import geopandas as gpd
import numpy as np
import pandas as pd
import shapely.wkt
import A1_provider_prep
import A2_analysis
import A3_other
from calitp.sql import to_snakecase
from shared_utils import geography_utils, utils



In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [3]:
att_og = A1_provider_prep.load_att()
tmobile_og = A1_provider_prep.load_tmobile()
verizon_og = A1_provider_prep.load_verizon()

In [None]:
# Simplify provider maps
# https://stackoverflow.com/questions/69103074/gdf-simplify-messes-up-geometries
def simplify_geometry(provider: gpd.GeoDataFrame):
    provider = provider.assign(
    geometry = (provider
                .geometry.to_crs(geography_utils.CA_StatePlane)
                .simplify(0.1)
                .buffer(0)
                .to_crs(geography_utils.WGS84))
    )

    return provider

### Notes
* Verizon_simple does not work with `comparison.` Around 500 routes are passed when using `overlay_single_routes`.
* Verizon_og does not work. Returns the error: `TopologyException: side location conflict at -122.30299999995003 37.938999999750251. This can occur if the input geometry is invalid.`
* Both gdf returns `true` when `.geometry.is_valid.all()` is applied to them.

In [None]:
# TopologyException: side location conflict at -122.30299999995003 37.938999999750251. This can occur if the input geometry is invalid.
# verizon_o = A2_analysis.comparison(verizon_og, r1)

### Function Ideas
* Other Function should:
    * Drop Duplicates
    * Sum up new route length by long route name
    * Find max of original route length
    * Divide new route length by original to get percentage covered. 
    * Del irrelevant columns
    * Does it have to be a gdf at this point or can it just be a normal dataframe?
* Now if something has a low percentage of intersecting: that is a good sign? 
* Maps only contain areas *without* coverage. So if it only intersects a little, means route mostly has coverage?

In [None]:
tmobile_overlay = A2_analysis.comparison(unique_routes, tmobile_og )

In [None]:
tmobile_overlay = A2_analysis.dissolve_summarize(tmobile_overlay)

In [None]:
len(tmobile_overlay), tmobile_overlay.long_route_name.nunique()

In [None]:
ac_transit_only_tmobile = tmobile_overlay[tmobile_overlay.agency == 'AC Transit']

In [None]:
ac_transit_only_att = att_district_simplified_overlay[att_district_simplified_overlay.agency == 'AC Transit']

In [None]:
ac_transit_only_tmobile[['long_route_name','percentage_route_covered']].sort_values('percentage_route_covered')