# Notebook to prep functions for report

* Grouped down to the DISTRICT and the CYCLE level

In [1]:
import intake
import numpy as np
import pandas as pd
import geopandas as gpd
from calitp import to_snakecase
from dla_utils import _dla_utils
from IPython.display import HTML, Markdown
from siuba import *
from shared_utils import geography_utils

import altair as alt

import _data_cleaning
import _report_utils



In [2]:
import fiona

from ipyleaflet import Map, GeoJSON, projections, basemaps, GeoData, LayersControl, WidgetControl, GeoJSON, LegendControl
from ipywidgets import Text, HTML

In [3]:
GCS_FILE_PATH = 'gs://calitp-analytics-data/data-analyses/dla/atp/'


In [4]:
## read in data

In [5]:
df_all = _report_utils.read_in_joined_data()



In [6]:
pd.set_option("display.max_columns",500)

In [7]:
## paramaters cell
district = 4

In [8]:
## parameters cell
cycle = 5

In [9]:
## subset df to just the district
#df = df_all>>filter(_.a2_ct_dist==district)

In [10]:
df = df_all.copy()

In [11]:
#df = df>>filter(_.project_cycle==cycle)

In [12]:
mapsubset_cols= ['awarded','project_app_id', 'project_cycle', 'data_origin', 'geometry',
               'a1_imp_agcy_city','a1_imp_agcy_name','a1_proj_partner_agcy', 
               'assembly_district','congressional_district','senate_district',
              'a2_county', 'a2_ct_dist','a2_info_proj_name','a3_proj_type', 'total_atp_$', 'a2_proj_lat','a2_proj_long']

In [13]:
df_map = _report_utils.fix_geom_issues(df, mapsubset_cols)

In [14]:
# #check where everything is mapped
# df = df_all.copy()

In [15]:
display(HTML("<h2>Quick Stats</h2>"))

display(HTML(f"Out of {len(df)} Active Transportation Program Project Applications, "
            f"there are <strong>{(df>>filter(_.awarded=='Y')).project_app_id.nunique()} "
            f"projects</strong> that recieved funding over "
            f"{df.project_cycle.nunique()} cycles"))


HTML(value='<h2>Quick Stats</h2>')

HTML(value='Out of 884 Active Transportation Program Project Applications, there are <strong>49 projects</stro…

In [16]:
display(HTML(f"<h3> What were the application outcomes "
             f"for District {district} "
             f"in Cycle {cycle}?</h3>"))
display(HTML(_dla_utils.pretify_tables((df>>count(_.data_origin)))))

HTML(value='<h3> What were the application outcomes for District 4 in Cycle 5?</h3>')

HTML(value='<style type="text/css">\n#T_e4dac th {\n  text-align: center;\n}\n#T_e4dac_row0_col0, #T_e4dac_row…

In [17]:
quick_view = df>>filter(_.awarded=="Y")>>select(_.data_origin, _.a1_imp_agcy_name, _.a2_info_proj_name, 
                                   _.a2_county, _.total_project_cost)

In [18]:
quick_view['total_project_cost'] = quick_view['total_project_cost'].map('$ {:0,.2f}'.format)

In [19]:
display(HTML("<h3> Funded Projects </h3>"))
display(HTML(_dla_utils.pretify_tables(quick_view)))

HTML(value='<h3> Funded Projects </h3>')

HTML(value='<style type="text/css">\n#T_e033c th {\n  text-align: center;\n}\n#T_e033c_row0_col0, #T_e033c_row…

In [20]:
#df>>group_by(_.awarded)>>count(_.a2_county)>>arrange(_.a2_county)

## Mapping

In [21]:
## offloading map function development to new notebook

In [22]:
## offloading cells correcting geometries for mapping 
## UPDATE: now fixed

## Metrics

In [23]:
unique_agencies = df>>group_by(_.project_cycle, _.a2_county, _.data_origin)>>summarize(n_unique_agency= _.a1_imp_agcy_name.nunique())

In [24]:
unique_agencies =(spread(unique_agencies, "data_origin", "n_unique_agency"))

In [25]:
unique_agencies = unique_agencies.rename(columns={"a2_county":"county_name"})

In [26]:
unique_agencies['Application'].fillna(0, inplace=True)
unique_agencies['Funded'].fillna(0, inplace=True)

In [27]:
unique_agencies['Application'] = unique_agencies['Application'].astype('int32')
unique_agencies['Funded'] = unique_agencies['Funded'].astype('int32')

In [28]:
display(HTML("<h3>Number of Unique Agencies By County</h3>"))
display(HTML(_dla_utils.pretify_tables(unique_agencies)))


HTML(value='<h3>Number of Unique Agencies By County</h3>')

HTML(value='<style type="text/css">\n#T_82aa5 th {\n  text-align: center;\n}\n#T_82aa5_row0_col0, #T_82aa5_row…

### Success Rates

In [29]:
df = (_report_utils.reorder_namecol(df,
                    og_name_col= 'a1_imp_agcy_name',
                    new_name_col= 'imp_agency_name_new', 
                    split_on = ", ",
                   order_on ='pt2_pt1'))



In [30]:
successes = (df>>group_by(_.awarded)>>count(_.imp_agency_name_new))>>spread("awarded", "n")>>arrange(-_.Y)

In [31]:
successes['total'] = (successes['N'] + successes['Y'])

In [32]:
successes['success_rate'] = (successes['Y']/successes['total'])

In [33]:
successes = successes.rename(columns={"imp_agency_name_new":"Implementing Agency", "N":"Projects Not Funded",
                                    "Y":"Funded Projects", "total":"Total Applications"})

In [34]:
successes_top = successes>>filter(_.success_rate>0)

In [35]:
successes_top['success_rate'] = successes_top['success_rate'].transform(lambda x: '{:,.2%}'.format(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [36]:
display(HTML("<h3>Application Success Rate</h3>"))

display(HTML(f"There are <strong>{len(successes>>filter(_.success_rate==0))}</strong> "
            f"implenting agencies with <strong> zero </strong>"
            f"successful applications."))

display(HTML(f"There are <strong>{len(successes>>filter(_.success_rate!=0))}</strong> "
            f"implenting agencies with <strong> one of more </strong>"
            f"successful applications."))
    
display(HTML("</br><h4> Success Rates for Agencies with Successful Applications </h4>"))
display(HTML(_dla_utils.pretify_tables(successes_top>>select(_['Implementing Agency'], _['Total Applications'], _.success_rate))))

HTML(value='<h3>Application Success Rate</h3>')

HTML(value='There are <strong>296</strong> implenting agencies with <strong> zero </strong>successful applicat…

HTML(value='There are <strong>42</strong> implenting agencies with <strong> one of more </strong>successful ap…

HTML(value='</br><h4> Success Rates for Agencies with Successful Applications </h4>')

HTML(value='<style type="text/css">\n#T_f5bef th {\n  text-align: center;\n}\n#T_f5bef_row0_col0, #T_f5bef_row…