## Prepare Provider Maps 

In [None]:
import dask.dataframe as dd
import dask_geopandas as dg
import geopandas as gpd
import pandas as pd
import shapely.wkt
import utilities
from calitp import *
from shared_utils import geography_utils, utils

### Load in Counties/Districts

In [None]:
counties = utilities.get_counties()

In [None]:
# Grab only Kern County because it's plotting weirdly
kern = counties.loc[counties.county_name == "Kern"].reset_index(drop=True)

In [None]:
kern.geometry.is_valid.all()

In [None]:
# Non node intersection line string error - fix Kern County
# https://github.com/geopandas/geopandas/issues/1724
kern["geometry"] = kern["geometry"].apply(
    lambda x: shapely.wkt.loads(shapely.wkt.dumps(x, rounding_precision=4))
)

In [None]:
districts = to_snakecase(
    gpd.read_file(f"{utilities.caltrans_shape}").to_crs(epsg=4326)
)[["district", "geometry"]]

In [None]:
# Grab only district 4
d4 = districts.loc[districts.district == 4].reset_index(drop=True)

In [None]:
# Grab only district 7
d7 = districts.loc[districts.district == 7].reset_index(drop=True)

### Load in Provider Maps

#### T-Mobile

In [None]:
# Original map
tmobile = dg.read_parquet(
    "gs://calitp-analytics-data/data-analyses/cellular_coverage/tmobile_california.parquet"
)

In [None]:
type(tmobile)

In [None]:
# geojson_gcs_export(tmobile, utilities.GCS_FILE_PATH, 'tmobile_geojson')

#### Verizon 
* Crashes when clipping to Kern County
* Doesn't work at all with districts shapefile?
* Fix Kern county and concat it with the other files 
* Final result: 
   * `gs://calitp-analytics-data/data-analyses/cellular_coverage/verizon_all_counties.parquet`

In [None]:
# Original map
verizon = dg.read_parquet("Verizon.parquet")

In [None]:
# Fix Kern
# verizon_kern = utilities.find_difference_and_clip(verizon, kern)

In [None]:
# verizon_kern.plot()

### Sjoin  for T-Mobile by Districts
* T-Mobile includes portions of other states. 
* Do a sjoin to only grab rows of T-Mobile's gdf that overlap with California using Caltrans districts before clipping and finding areas with no coverage. 

In [None]:
def district_sjoin(
    provider, gcs_file_path: str, file_name: str, districts_wanted: list
):

    # Original CT map
    ct_all_districts = to_snakecase(
        gpd.read_file(f"{utilities.caltrans_shape}").to_crs(epsg=4326)
    )[["district", "geometry"]]

    # Empty dataframe to hold each district after clipping
    full_gdf = pd.DataFrame()

    # Specify districts I want.
    for i in districts_wanted:
        district_gdf = ct_all_districts[ct_all_districts.district == i].reset_index(
            drop=True
        )

        # https://dask-geopandas.readthedocs.io/en/stable/docs/reference/api/dask_geopandas.GeoDataFrame.sjoin.html
        dask_sjoin = provider.sjoin(district_gdf, how="inner", predicate="intersects")
        full_gdf = dd.multi.concat([full_gdf, dask_sjoin], axis=0)
        print(f"Done concating for {i}")

    # Turn back to a regular gdf
    full_gdf = full_gdf.compute()

    # Save to GCS
    utils.geoparquet_gcs_export(full_gdf, gcs_file_path, file_name)
    print("Saved to GCS")

    return full_gdf

In [None]:
# test = district_sjoin(tmobile, utilities.GCS_FILE_PATH,'tmobile_d1_d4', [1,2,3,4])

#### Testing

In [None]:
# https://dask-geopandas.readthedocs.io/en/stable/docs/reference/api/dask_geopandas.GeoDataFrame.sjoin.html
# dask_sjoin = tmobile.sjoin(d4, how='inner', predicate='intersects')

In [None]:
# https://dask-geopandas.readthedocs.io/en/stable/docs/reference/api/dask_geopandas.GeoDataFrame.sjoin.html
# dask_sjoin2 = tmobile.sjoin(d7, how='inner', predicate='intersects')

In [None]:
# Multi concat
# dask_concat =  dd.multi.concat([dask_sjoin, dask_sjoin2], axis=0)

In [None]:
# Turn into normal df
# dask_concat_gdf = dask_concat.compute()

In [None]:
# dask_concat_gdf_dissolved = dask_concat_gdf.dissolve().drop(columns = ['district'])

In [None]:
# https://dask-geopandas.readthedocs.io/en/stable/docs/reference/api/dask_geopandas.GeoDataFrame.dissolve.html
# dask_concat_dissolve = dask_concat.dissolve("district")

In [None]:
# Find length of a dask df
# https://stackoverflow.com/questions/50569171/how-do-i-find-the-length-of-a-dataframe-in-dask
# print(len(dask_concat_dissolve.index))

### Find Counties w/o Coverage

In [None]:
county_names = counties.county_name.sort_values().unique().tolist()

In [None]:
def breakout_counties(
    provider, gcs_file_path: str, file_name: str, counties_wanted: list
):
    counties = utilities.get_counties()

    # Empty dataframe to hold each district after clipping
    full_gdf = pd.DataFrame()

    for i in counties_wanted:
        county_gdf = counties[counties.county_name == i].reset_index(drop=True)

        county_gdf_clipped = utilities.find_difference_and_clip(provider, county_gdf)
        full_gdf = dd.multi.concat([full_gdf, county_gdf_clipped], axis=0)
        print(f"done concating for {i}")

    # Turn this into a GDF
    full_gdf = full_gdf.compute()

    # Save to GCS
    utils.geoparquet_gcs_export(full_gdf, gcs_file_path, file_name)
    print("saved to GCS")

    return full_gdf

### Find Districts w/o Coverage

In [None]:
# Read in original CT map
ct_all_districts = to_snakecase(
        gpd.read_file(f"{utilities.caltrans_shape}").to_crs(epsg=4326)
    )[["district", "geometry"]]
    

In [None]:
# Filter for only districts wanted
ct_districts_filtered = ct_all_districts[ct_all_districts["district"].isin([1,2,3,4])].reset_index()

In [None]:
type(ct_districts_filtered)

In [None]:
ct_districts_filtered.plot()

In [None]:
tmobile_d1d4_dd = dg.read_parquet(
    "gs://calitp-analytics-data/data-analyses/cellular_coverage/tmobile_d1_d4.parquet"
)

In [None]:
tmobile_d1d4_dd_gdf = tmobile_d1d4_dd.compute()

In [None]:
tmobile_d1d4_dd_gdf.plot()

In [None]:
test = utilities.find_difference_and_clip(tmobile_d1d4_dd, ct_districts_filtered)

In [None]:
def breakout_districts(
    sjoin_provider_map: dg.GeoDataFrame,
    districts_wanted: list,
    gcs_file_path: str,
    file_name: str,
):
    # Read in original CT map
    ct_all_districts = to_snakecase(
        gpd.read_file(f"{utilities.caltrans_shape}").to_crs(epsg=4326)
    )[["district", "geometry"]]
    
    # Filter for only districts wanted
    ct_districts_wanted = ct_all_districts[ct_all_districts["district"].isin(districts_wanted)].reset_index(drop = True)
    
    # Find difference and clip 
    county_gdf_clipped = utilities.find_difference_and_clip(
        sjoin_provider_map, ct_districts_wanted
    )
    print("Done finding difference")

    # Save to GCS
    utils.geoparquet_gcs_export(county_gdf_clipped, gcs_file_path, file_name)
    print("saved to GCS")
    
    return county_gdf_clipped

In [None]:
test = breakout_districts(tmobile_d1d4_dd, [1,2,], utilities.GCS_FILE_PATH, "tmobile_no_coverage_d1_d4")

### Concat all separated out areas using dask

In [None]:
# California is separated out into different gdfs that contain
# portions of districts/counties. Concat them all together
# to get the entirety of California again.
def concat_all_areas(all_gdf: list, gcs_file_path: str, file_name: str):

    # Empty dataframe
    full_gdf = pd.DataFrame()

    # Concat all the districts that were broken out into one
    full_gdf = dd.multi.concat(all_gdf, axis=0)

    # Turn it into a gdf
    full_gdf = full_gdf.compute()

    # Export
    utils.geoparquet_gcs_export(full_gdf, gcs_file_path, file_name)

    print("Saved to GCS")
    return full_gdf

In [None]:
# tmobile_all_CA = concat_all_areas(tmobile, utilities.GCS_FILE_PATH, "tmobile_overlap_with_CA")

In [None]:
verizon_wo_kern = gpd.read_parquet(
    "gs://calitp-analytics-data/data-analyses/cellular_coverage/verizon_all_counties_except_Kern.parquet"
)

In [None]:
# verizon_wo_kern.plot()

In [None]:
verizon_all = gpd.read_parquet(
    "gs://calitp-analytics-data/data-analyses/cellular_coverage/verizon_all_counties.parquet"
)

In [None]:
# verizon_all.plot()