# Notebook to prep functions for report

* Grouped down to the DISTRICT and the CYCLE level

In [1]:
import intake
import numpy as np
import pandas as pd
import geopandas as gpd
from calitp import to_snakecase
from dla_utils import _dla_utils
from IPython.display import HTML, Markdown
from siuba import *
from shared_utils import geography_utils

import altair as alt

import _data_cleaning
import _report_utils



In [2]:
import fiona

from ipyleaflet import Map, GeoJSON, projections, basemaps, GeoData, LayersControl, WidgetControl, GeoJSON, LegendControl
from ipywidgets import Text, HTML

In [3]:
GCS_FILE_PATH = 'gs://calitp-analytics-data/data-analyses/dla/atp/'


In [4]:
## read in data

In [5]:
df_all = _report_utils.read_in_joined_data()



In [6]:
pd.set_option("display.max_columns",500)

In [7]:
## paramaters cell
district = 4

In [8]:
## parameters cell
cycle = 5

In [9]:
## subset df to just the district
#df = df_all>>filter(_.a2_ct_dist==district)

In [10]:
df = df_all.copy()

In [11]:
#df = df>>filter(_.project_cycle==cycle)

In [66]:
mapsubset_cols= ['awarded','project_app_id', 'project_cycle', 'data_origin', 'geometry',
               'a1_imp_agcy_city','a1_imp_agcy_name','a1_proj_partner_agcy', 
               'assembly_district','congressional_district','senate_district',
              'a2_county', 'a2_ct_dist','a2_info_proj_name','a3_proj_type', 'total_atp_$', 'a2_proj_lat','a2_proj_long']

In [83]:
df_map = _report_utils.fix_geom_issues(df, mapsubset_cols)

In [68]:
# #check where everything is mapped
# df = df_all.copy()

In [69]:
display(HTML("<h2>Quick Stats</h2>"))

display(HTML(f"Out of {len(df)} Active Transportation Program Project Applications, "
            f"there are <strong>{(df>>filter(_.awarded=='Y')).project_app_id.nunique()} "
            f"projects</strong> that recieved funding over "
            f"{df.project_cycle.nunique()} cycles"))


HTML(value='<h2>Quick Stats</h2>')

HTML(value='Out of 884 Active Transportation Program Project Applications, there are <strong>49 projects</stro…

In [70]:
display(HTML(f"<h3> What were the application outcomes "
             f"for District {district} "
             f"in Cycle {cycle}?</h3>"))
display(HTML(_dla_utils.pretify_tables((df>>count(_.data_origin)))))

HTML(value='<h3> What were the application outcomes for District 4 in Cycle 5?</h3>')

HTML(value='<style type="text/css">\n#T_eea36 th {\n  text-align: center;\n}\n#T_eea36_row0_col0, #T_eea36_row…

In [71]:
quick_view = df>>filter(_.awarded=="Y")>>select(_.data_origin, _.a1_imp_agcy_name, _.a2_info_proj_name, 
                                   _.a2_county, _.total_project_cost)

In [72]:
quick_view['total_project_cost'] = quick_view['total_project_cost'].map('$ {:0,.2f}'.format)

In [73]:
display(HTML("<h3> Funded Projects </h3>"))
display(HTML(_dla_utils.pretify_tables(quick_view)))

HTML(value='<h3> Funded Projects </h3>')

HTML(value='<style type="text/css">\n#T_fbbac th {\n  text-align: center;\n}\n#T_fbbac_row0_col0, #T_fbbac_row…

In [74]:
#df>>group_by(_.awarded)>>count(_.a2_county)>>arrange(_.a2_county)

## Mapping

In [75]:
df_map.explore("data_origin", cmap="tab20b")

In [76]:
## offloading cells correcting geometries for mapping 
## UPDATE: now fixed

In [100]:
## from amanda's notebook for Project prioritization: https://github.com/cal-itp/data-analyses/blob/main/project_prioritization/_utils.py
shapes = "https://gis.data.ca.gov/datasets/0144574f750f4ccc88749004aca6eb0c_0.geojson?outSR=%7B%22latestWkid%22%3A3857%2C%22wkid%22%3A102100%7D"
district_shapes = gpd.read_file(f"{shapes}").to_crs(epsg=4326)   

In [104]:
#district_shapes.boundary.explore()

In [101]:
## county geography

shapes2 = "https://opendata.arcgis.com/datasets/8713ced9b78a4abb97dc130a691a8695_0.geojson"
county_shapes = gpd.read_file(f"{shapes2}").to_crs(epsg=4326)

In [103]:
#county_shapes.boundary.explore()

In [91]:
df_map = df_map.drop(columns='index_right')

In [94]:
# df_map = district_shapes.merge(
#     df_map, how="inner", left_on="DISTRICT", right_on="a2_ct_dist")

In [95]:
df_map

Unnamed: 0,project_app_id,project_cycle,awarded,data_origin,geometry,a1_imp_agcy_city,a1_imp_agcy_name,a1_proj_partner_agcy,assembly_district,congressional_district,senate_district,a2_county,a2_ct_dist,a2_info_proj_name,a2_proj_lat,a2_proj_long,a3_proj_type,total_atp_$,State,point_check
180,03-El Dorado County-1,5,N,Application,POINT (-120.82612 38.70348),Placerville,El Dorado County,,5,4,1,El Dorado,3.00,El Dorado Trail / Missouri Flat Road Pedestria...,38.70,-120.83,Infrastructure - Medium,,CA,Point In State
99,"6-Fresno, City of-2",5,N,Application,POINT (-119.80835 36.75038),Fresno,"Fresno, City of",,"23, 31",16,Needs Manual Assistance,Fresno,6.00,Palm and Belmont Protected Bikeway Project,36.75,-119.81,Infrastructure - Small,,CA,Point In State
211,"6-Fresno, City of-3",5,N,Application,POINT (-119.77249 36.76316),Fresno,"Fresno, City of",,31,16,Needs Manual Assistance,Fresno,6.00,"Cross, Walk & Roll! SRTS in Central Fresno",36.76,-119.77,Infrastructure + NI - Small,,CA,Point In State
275,6-Kern Council of Governments-1,5,N,Application,POINT (-119.01526 35.37227),Bakersfield,Kern Council of Governments,,"32, 34, 36","21, 23","14, 16",Kern,6.00,Safe Routes for Cyclists in Kern County's Disa...,35.37,-119.02,Non-Infrastructure,,CA,Point In State
415,6-Kern County - D6-1,5,N,Application,POINT (-118.89088 35.43151),Bakersfield,Kern County - D6,,34,23,16,Kern,6.00,Kern River Parkway Multi-Use Path Safety Impro...,35.43,-118.89,Infrastructure - Small,,CA,Point In State
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9,"6-Delano, City of-1",5.00,Y,Funded,POINT (-119.24710 35.76880),Delano,"Delano, City of",,32,21,14,Kern,6.00,ATP-5 SRTS Intersection Enhancement and NI Wor...,35.77,-119.25,,1164000.00,CA,Point In State
29,4-Contra Costa County-2,5.00,Y,Funded,POINT (-121.94196 38.02416),Martinez,Contra Costa County,,14,11,7,Contra Costa,4.00,North Bailey Road Active Transportation Corridor,38.02,-121.94,,6159000.00,CA,Point In State
28,10-Mariposa County-2,5.00,Y,Funded,POINT (-119.97080 37.48700),Mariposa,Mariposa County,,5,4,8,Mariposa,10.00,Mariposa Creek Parkway,37.49,-119.97,,4415000.00,CA,Point In State
1,"7-South El Monte, City of-1",5.00,Y,Funded,POINT (-118.04670 34.05200),South El Monte,"South El Monte, City of",,57,38,22,Los Angeles,7.00,South El Monte Safe Routes to School Pedestria...,34.05,-118.05,,1637000.00,CA,Point In State


## Metrics

In [33]:
unique_agencies = df>>group_by(_.project_cycle, _.a2_county, _.data_origin)>>summarize(n_unique_agency= _.a1_imp_agcy_name.nunique())

In [34]:
unique_agencies =(spread(unique_agencies, "data_origin", "n_unique_agency"))

In [35]:
unique_agencies = unique_agencies.rename(columns={"a2_county":"county_name"})

In [36]:
unique_agencies['Application'].fillna(0, inplace=True)
unique_agencies['Funded'].fillna(0, inplace=True)

In [37]:
unique_agencies['Application'] = unique_agencies['Application'].astype('int32')
unique_agencies['Funded'] = unique_agencies['Funded'].astype('int32')

In [38]:
display(HTML("<h3>Number of Unique Agencies By County</h3>"))
display(HTML(_dla_utils.pretify_tables(unique_agencies)))


HTML(value='<h3>Number of Unique Agencies By County</h3>')

HTML(value='<style type="text/css">\n#T_dd2a2 th {\n  text-align: center;\n}\n#T_dd2a2_row0_col0, #T_dd2a2_row…

### Success Rates

In [39]:
df = (_report_utils.reorder_namecol(df,
                    og_name_col= 'a1_imp_agcy_name',
                    new_name_col= 'imp_agency_name_new', 
                    split_on = ", ",
                   order_on ='pt2_pt1'))



In [40]:
successes = (df>>group_by(_.awarded)>>count(_.imp_agency_name_new))>>spread("awarded", "n")>>arrange(-_.Y)

In [41]:
successes['total'] = (successes['N'] + successes['Y'])

In [42]:
successes['success_rate'] = (successes['Y']/successes['total'])

In [43]:
successes = successes.rename(columns={"imp_agency_name_new":"Implementing Agency", "N":"Projects Not Funded",
                                    "Y":"Funded Projects", "total":"Total Applications"})

In [44]:
successes_top = successes>>filter(_.success_rate>0)

In [45]:
successes_top['success_rate'] = successes_top['success_rate'].transform(lambda x: '{:,.2%}'.format(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [46]:
display(HTML("<h3>Application Success Rate</h3>"))

display(HTML(f"There are <strong>{len(successes>>filter(_.success_rate==0))}</strong> "
            f"implenting agencies with <strong> zero </strong>"
            f"successful applications."))

display(HTML(f"There are <strong>{len(successes>>filter(_.success_rate!=0))}</strong> "
            f"implenting agencies with <strong> one of more </strong>"
            f"successful applications."))
    
display(HTML("</br><h4> Success Rates for Agencies with Successful Applications </h4>"))
display(HTML(_dla_utils.pretify_tables(successes_top>>select(_['Implementing Agency'], _['Total Applications'], _.success_rate))))

HTML(value='<h3>Application Success Rate</h3>')

HTML(value='There are <strong>296</strong> implenting agencies with <strong> zero </strong>successful applicat…

HTML(value='There are <strong>42</strong> implenting agencies with <strong> one of more </strong>successful ap…

HTML(value='</br><h4> Success Rates for Agencies with Successful Applications </h4>')

HTML(value='<style type="text/css">\n#T_7f206 th {\n  text-align: center;\n}\n#T_7f206_row0_col0, #T_7f206_row…