# Map Dev for Reports

In [None]:
import intake
import numpy as np
import pandas as pd
import geopandas as gpd
from calitp import to_snakecase
from dla_utils import _dla_utils
from IPython.display import HTML, Markdown
from siuba import *
from shared_utils import geography_utils

import altair as alt

import _data_cleaning
import _report_utils

In [None]:
GCS_FILE_PATH = 'gs://calitp-analytics-data/data-analyses/dla/atp/'


In [None]:
df = _report_utils.read_in_joined_data()

In [None]:
pd.set_option("display.max_columns",500)

In [None]:
mapsubset_cols= ['awarded','project_app_id', 'project_cycle', 'data_origin', 'geometry',
               'a1_imp_agcy_city','a1_imp_agcy_name','a1_proj_partner_agcy', 
               'assembly_district','congressional_district','senate_district',
              'a2_county', 'a2_ct_dist','a2_info_proj_name','a3_proj_type', 'total_atp_$', 'a2_proj_lat','a2_proj_long']

In [None]:
df_map = _report_utils.fix_geom_issues(df, mapsubset_cols)

In [None]:
#df_map.explore("data_origin", cmap="tab20b")

In [None]:
## from amanda's notebook for Project prioritization: https://github.com/cal-itp/data-analyses/blob/main/project_prioritization/_utils.py
shapes = "https://gis.data.ca.gov/datasets/0144574f750f4ccc88749004aca6eb0c_0.geojson?outSR=%7B%22latestWkid%22%3A3857%2C%22wkid%22%3A102100%7D"
district_shapes = gpd.read_file(f"{shapes}").to_crs(epsg=4326)   

In [None]:
#district_shapes.boundary.explore()

In [None]:
## county geography

shapes2 = "https://opendata.arcgis.com/datasets/8713ced9b78a4abb97dc130a691a8695_0.geojson"
county_shapes = gpd.read_file(f"{shapes2}").to_crs(epsg=4326)

In [None]:
#county_shapes.boundary.explore()

In [None]:
df_map = df_map.drop(columns='index_right')

## Joining District and Points

In [None]:
def nunique_by_geography(df,
                         geodf,
                         geog_col,
                         agg_col,
                         geodf_mergeon_col):
    
    sum_df = (df>>group_by(_[geog_col])
              >>summarize(n_unique = _[agg_col].nunique()))
    
    joined_df = geodf.merge(sum_df, 
                            how='inner',
                            left_on=[geodf_mergeon_col],
                            right_on=[geog_col])
    
    return joined_df
              

In [None]:
by_dist = nunique_by_geography(df_map,
                         district_shapes,
                         'a2_ct_dist',
                         'project_app_id',
                         'DISTRICT')

In [None]:
#by_dist.explore('n_applications', cmap='Oranges', highlight=True, legend=True)

## Layering

In [None]:
#base = district_shapes.plot(color='white', edgecolor='black')

#df_map.plot(ax=base, marker='o', color='blue', markersize=5)

In [None]:
# base = district_shapes.plot(figsize=(8,8), color='white', edgecolor='black')
# layered = df_map.plot(ax=base, marker='o', legend='true', column='data_origin', markersize=10)
# layered.set_title('ATP Applications',fontsize=18)

# layered

In [None]:
import folium

In [None]:
## following map works, commenting out to save

In [None]:
df_map.sample()

In [None]:
m = by_dist.explore(
     column="n_unique", 
     legend=True,
    color = 'oranges',
     legend_kwds=dict(colorbar=True),
     tooltip=["DISTRICT", "n_unique"], 
     name="Number of Applications by County")

df_map.explore(
     m=m, 
     color="data_origin", 
     marker_kwds=dict(radius=3, fill=True),
     tooltip=["data_origin", "project_cycle", "a1_imp_agcy_city", "a2_info_proj_name"], 
     tooltip_kwds=dict(labels=True),
     name="Project Locations"
)

folium.TileLayer('Stamen Toner', control=True).add_to(m) 
folium.LayerControl().add_to(m)

m

In [None]:
# points = df_map.explore(
#      column="data_origin", 
#      legend=True,
#      legend_kwds=dict(colorbar=False),
#      name="Application Result",
#      tooltip=["data_origin", "project_cycle", "a1_imp_agcy_city", "a2_info_proj_name"], 
#      tooltip_kwds=dict(labels=False), 
# )

In [None]:
# points

### By Congressional Dist

In [None]:
df_map.sample()

In [None]:
shapes = "https://gis.data.ca.gov/datasets/f173bfa16514414ab6130c248fdd9d28_2.geojson"
congesional_shapes = gpd.read_file(f"{shapes}").to_crs(epsg=4326)   


In [None]:
## make sure it ran- it did
#congesional_shapes.explore()

In [None]:
df_map.congressional_district.value_counts()

In [None]:
df_map>>filter(_.congressional_district.str.contains(','))

In [None]:
districts = df_map>>select(_.project_app_id, _.project_cycle, _.data_origin, _.a1_imp_agcy_city,_.assembly_district, _.congressional_district, _.senate_district) 

In [None]:
districts = districts.replace('Needs Manual Assistance', 0)

In [None]:
districts

In [None]:
# cols = ['assembly_district', 'congressional_district', 'senate_district']
# for col in cols:
#     pd.concat([districts[['project_app_id']], districts[col].str.split(', ', expand=True)], axis=1)

In [None]:
#assembly_split = pd.concat([districts[['project_app_id','project_cycle','data_origin','a1_imp_agcy_city']], districts['assembly_district'].str.split(', ', expand=True)], axis=1)

In [None]:
## following does not take into account tany single values
#districts[['assembly_district_1', 'assembly_district_2', 'assembly_district_3']] = districts['assembly_district'].str.split(', ', expand=True)

In [None]:
districts = districts.astype({'assembly_district':'str',
                              'congressional_district':'str',
                              'senate_district':'str'})

In [None]:
#districts.explode('senate_district')

In [None]:
districts[['assembly_district_1', 'assembly_district_2', 'assembly_district_3']] = districts['assembly_district'].str.split(', ', expand=True)

In [None]:
districts[['congressional_district_1', 'congressional_district_2', 'congressional_district_3']] = districts['congressional_district'].str.split(', ', expand=True)

In [None]:
districts[['senate_district_1', 'senate_district_2', 'senate_district_3']] = districts['senate_district'].str.split(', ', expand=True)

In [None]:
congressional_dist_counts = (districts 
 >> gather('measure', 'congressional_district2',_.congressional_district_1, _.congressional_district_2, _.congressional_district_3) 
 >> select(_.project_app_id, _.project_cycle, _.data_origin, _.a1_imp_agcy_city, _.congressional_district2)
 >> filter(_.congressional_district2.notnull()))


In [None]:
congressional_dist_counts

In [None]:
congressional_dist = nunique_by_geography(congressional_dist_counts,
                         congesional_shapes,
                         'congressional_district2',
                         'project_app_id',
                         'DISTRICT')

In [None]:
congressional_dist.sample()

In [None]:
# congressional_dist.explore('n_unique',
#                            cmap='Oranges', highlight=True, legend=True)

In [None]:
#looks like some projects are missing in the join.

### By Senate Dist

In [None]:
shapes = "https://gis.data.ca.gov/datasets/f173bfa16514414ab6130c248fdd9d28_1.geojson"
senate_shapes = gpd.read_file(f"{shapes}").to_crs(epsg=4326)   


In [None]:
## make sure it ran- it did
#senate_shapes.explore()

In [None]:
# nunique_by_geography(df_map,
#                          senate_shapes,
#                          'a2_ct_dist',
#                          'project_app_id',
#                          'DISTRICT')

In [None]:
senate_shapes.sample()

### By Assembly Dist

In [None]:
shapes = "https://gis.data.ca.gov/datasets/f173bfa16514414ab6130c248fdd9d28_0.geojson"
assembly_shapes = gpd.read_file(f"{shapes}").to_crs(epsg=4326)   

In [None]:
## make sure it ran- it did
#assembly_shapes.explore()