# Notebook to prep functions for report

* Grouped down to the DISTRICT and the CYCLE level

In [1]:
import intake
import numpy as np
import pandas as pd
import geopandas as gpd
from calitp import to_snakecase
from dla_utils import _dla_utils
from IPython.display import HTML, Markdown
from siuba import *
from shared_utils import geography_utils

import altair as alt

import _data_cleaning
import _report_utils



In [2]:
import fiona

from ipyleaflet import Map, GeoJSON, projections, basemaps, GeoData, LayersControl, WidgetControl, GeoJSON, LegendControl
from ipywidgets import Text, HTML

In [3]:
GCS_FILE_PATH = 'gs://calitp-analytics-data/data-analyses/dla/atp/'


In [4]:
df_all = _report_utils.read_in_joined_data()



In [5]:
pd.set_option("display.max_columns",500)

In [6]:
## paramaters cell
district = 4

In [7]:
## parameters cell
cycle = 5

In [8]:
## subset df to just the district
#df = df_all>>filter(_.a2_ct_dist==district)

In [9]:
df = df_all.copy()

In [10]:
df = df>>filter(_.project_cycle==cycle)

In [11]:
# #check where everything is mapped
# df = df_all.copy()

In [12]:
display(HTML("<h2>Quick Stats</h2>"))

display(HTML(f"Out of {len(df)} Active Transportation Program Project Applications, "
            f"there are <strong>{(df>>filter(_.awarded=='Y')).project_app_id.nunique()} "
            f"projects</strong> that recieved funding over "
            f"{df.project_cycle.nunique()} cycles"))


HTML(value='<h2>Quick Stats</h2>')

HTML(value='Out of 450 Active Transportation Program Project Applications, there are <strong>49 projects</stro…

In [13]:
display(HTML(f"<h3> What were the application outcomes "
             f"for District {district} "
             f"in Cycle {cycle}?</h3>"))
display(HTML(_dla_utils.pretify_tables((df>>count(_.data_origin)))))

HTML(value='<h3> What were the application outcomes for District 4 in Cycle 5?</h3>')

HTML(value='<style type="text/css">\n#T_e9a1d th {\n  text-align: center;\n}\n#T_e9a1d_row0_col0, #T_e9a1d_row…

In [14]:
quick_view = df>>filter(_.awarded=="Y")>>select(_.data_origin, _.a1_imp_agcy_name, _.a2_info_proj_name, 
                                   _.a2_county, _.total_project_cost)

In [15]:
quick_view['total_project_cost'] = quick_view['total_project_cost'].map('$ {:0,.2f}'.format)

In [16]:
display(HTML("<h3> Funded Projects </h3>"))
display(HTML(_dla_utils.pretify_tables(quick_view)))

HTML(value='<h3> Funded Projects </h3>')

HTML(value='<style type="text/css">\n#T_9fcbb th {\n  text-align: center;\n}\n#T_9fcbb_row0_col0, #T_9fcbb_row…

In [17]:
#df>>group_by(_.awarded)>>count(_.a2_county)>>arrange(_.a2_county)

## Mapping

In [18]:
df_map= (df >>select(_.awarded, _.project_app_id, _.project_cycle, _.data_origin, _.geometry,
               _.a1_imp_agcy_city, _.a1_imp_agcy_name, _.a1_proj_partner_agcy, 
               _.assembly_district, _.congressional_district, _.senate_district,
              _.a2_county, _.a2_info_proj_descr, _.a2_info_proj_loc, _.a2_info_proj_name,
               _.a2_mop_uza_population, _.a2_mpo, _.a1_imp_agcy_street, _.a3_proj_type, 
               _.a3_proj_type, _['total_atp_$'], _.a2_proj_lat, _.a2_proj_long))

In [19]:
df_map = df_map>>filter(_.project_cycle==5)

In [20]:
#df_map>>filter(_.geometry.isnull())

In [21]:
## reapply geometry col for lat long
df_map = (geography_utils.create_point_geometry(df_map, longitude_col = 'a2_proj_long', latitude_col = 'a2_proj_lat'))

#### Flag for Incorrect Coordinates

In [22]:
## checking which is in/out of state bounds using

In [23]:
ca_bounds = gpd.read_parquet('gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/ca_boundary.parquet')

In [24]:
ca_bounds

Unnamed: 0,State,geometry
0,CA,"MULTIPOLYGON (((-122.52997 37.81541, -122.5300..."


In [25]:
#ca_bounds.explore()  

In [26]:
#gpd.overlay(ca_bounds, df_map3, how='difference')
# df_map3['in_ca'] = ""
# df_map3['in_ca'] = 
#df_map3.overlay(ca_bounds, how = "symmetric_difference")

In [27]:
(ca_bounds.contains(df_map))



0      False
0       True
1      False
1      False
2      False
       ...  
443    False
444    False
445    False
446    False
447    False
Length: 450, dtype: bool

In [28]:
joined = (df_map.sjoin(ca_bounds, how='left'))

In [29]:
joined.State.value_counts()

CA    320
Name: State, dtype: int64

In [30]:
len(joined>>filter(_.State.isna()))

130

In [31]:
(joined>>filter(_.State.isna()))

Unnamed: 0,awarded,project_app_id,project_cycle,data_origin,geometry,a1_imp_agcy_city,a1_imp_agcy_name,a1_proj_partner_agcy,assembly_district,congressional_district,senate_district,a2_county,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a1_imp_agcy_street,a3_proj_type,total_atp_$,a2_proj_lat,a2_proj_long,index_right,State
325,N,"6-Kingsburg, City of-1",5,Application,POINT (119.56000 36.51000),Kingsburg,"Kingsburg, City of",,31,21,12,Fresno,Roosevelt and Reagan Elementary School Pedestr...,Local streets surrounding Roosevelt and Reagan...,Roosevelt & Reagan Elementary Safe Routes to S...,Project is located outside one of the ten larg...,COFCG,1401 Draper Street,Infrastructure - Small,,36.51,119.56,,
89,N,"6-Delano, City of-2",5,Application,POINT (119.24710 35.76880),Delano,"Delano, City of",,32,21,14,Kern,"Construct 6,547 feet of new 4.5 ft. wide sidew...","Project is located in Delano, a severely disad...",ATP-5 Bike Lane and Sidewalk Gap Improvement P...,Project is located within one of the ten large...,KCOG,1015 Eleventh Avenue,Infrastructure + NI - Small,,35.77,119.25,,
271,N,"6-Fowler, City of-1",5,Application,POINT (119.68000 36.64000),Fowler,"Fowler, City of",,31,21,16,Fresno,Fremont Elementary/Marshall Elementary/Fowler ...,Adams Avenue between 7th Street and Temperance...,Fremont Elementary/ Marshall Elementaryl/Fowle...,Project is located outside one of the ten larg...,COFCG,128 South Fifth Street,Infrastructure - Small,,36.64,119.68,,
378,N,"7-Avalon, City of-1",5,Application,POINT (118.33000 33.34000),Avalon,"Avalon, City of",,70,47,26,Los Angeles,"Conversion of the ""Five-Corners"" intersections...",Intersection of Tremont Street-Sumner Avenue-A...,Tremont Five Corners School Safety Roundabouts,Project is located within one of the ten large...,SCAG,410 Avalon Canyon Road/PO Box 707,Infrastructure + NI - Medium,,33.34,118.33,,
382,N,"7-Baldwin Park, City of-1",5,Application,POINT (117.96491 43.10680),Baldwin Park,"Baldwin Park, City of",,48,32,22,Los Angeles,"Install Traffic Control Devices, HAWK System S...",Baldwin Park Boulevard from City limit (250 ft...,Baldwin Park Blvd Bike Lane Project,Project is located within one of the ten large...,SCAG,14403 E. Pacific Ave,Infrastructure - Medium,,43.11,117.96,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13,Y,11-San Diego Association of Governments (SANDA...,5.00,Funded,POINT (1170624.00000 324511.00000),San Diego,San Diego Association of Governments (SANDAG),City of San Diego,"78, 80","51, 53",40,San Diego,Buffered bike lanes; traffic calming and ped a...,The Orange Family Friendly Street Project is 2...,Orange Family Friendly Street Project,Project is located within one of the ten large...,SANDAG,"401 B Street, Suite 800",,4317000.00,324511.00,1170624.00,,
29,Y,4-Contra Costa County-2,5.00,Funded,POINT (121.94196 38.02416),Martinez,Contra Costa County,,14,11,7,Contra Costa,"Construct two-way cycle track, ADA-compliant c...","Bailey Road in unincorporated Bay Point, bound...",North Bailey Road Active Transportation Corridor,Project is located within one of the ten large...,MTC,255 Glacier Drive,,6159000.00,38.02,121.94,,
28,Y,10-Mariposa County-2,5.00,Funded,POINT (119.97080 37.48700),Mariposa,Mariposa County,,5,4,8,Mariposa,The Mariposa Creek Parkway provides an off-str...,The project is located along Mariposa Creek in...,Mariposa Creek Parkway,Project is located outside one of the ten larg...,MCTC,4639 Ben Hur Road,,4415000.00,37.49,119.97,,
1,Y,"7-South El Monte, City of-1",5.00,Funded,POINT (118.04670 34.05200),South El Monte,"South El Monte, City of",,57,38,22,Los Angeles,This project focuses on school and pedestrian ...,The project is fully in the City of South El M...,South El Monte Safe Routes to School Pedestria...,Project is located within one of the ten large...,SCAG,1415 Santa Anita Avenue,,1637000.00,34.05,118.05,,


In [32]:
_report_utils.check_point_in_state(joined,
                        'State',
                        'CA')

Unnamed: 0,awarded,project_app_id,project_cycle,data_origin,geometry,a1_imp_agcy_city,a1_imp_agcy_name,a1_proj_partner_agcy,assembly_district,congressional_district,senate_district,a2_county,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a1_imp_agcy_street,a3_proj_type,total_atp_$,a2_proj_lat,a2_proj_long,index_right,State,point_check
180,N,03-El Dorado County-1,5,Application,POINT (-120.82612 38.70348),Placerville,El Dorado County,,5,4,1,El Dorado,Construct a Class I multi-use grade-separated ...,On Missouri Flat Rd. between Golden Center Dr....,El Dorado Trail / Missouri Flat Road Pedestria...,Project is located within one of the ten large...,SACOG,2850 Fairlane Court,Infrastructure - Medium,,38.70,-120.83,0.00,CA,Point In State
99,N,"6-Fresno, City of-2",5,Application,POINT (-119.80835 36.75038),Fresno,"Fresno, City of",,"23, 31",16,Needs Manual Assistance,Fresno,Palm and Belmont: 3.5 miles of Class IV with C...,Bikeway on Palm Avenue: just north of Dakota A...,Palm and Belmont Protected Bikeway Project,Project is located within one of the ten large...,COFCG,2600 Fresno Street,Infrastructure - Small,,36.75,-119.81,0.00,CA,Point In State
211,N,"6-Fresno, City of-3",5,Application,POINT (-119.77249 36.76316),Fresno,"Fresno, City of",,31,16,Needs Manual Assistance,Fresno,"Install signal/scramble at Chestnut & Weldon, ...",The intersections of Chestnut and Weldon and F...,"Cross, Walk & Roll! SRTS in Central Fresno",Project is located within one of the ten large...,COFCG,2600 Fresno Street,Infrastructure + NI - Small,,36.76,-119.77,0.00,CA,Point In State
275,N,6-Kern Council of Governments-1,5,Application,POINT (-119.01526 35.37227),Bakersfield,Kern Council of Governments,,"32, 34, 36","21, 23","14, 16",Kern,Identified disadvantaged communities in Kern C...,"DACs in Arvin, Bakersfield, California City, D...",Safe Routes for Cyclists in Kern County's Disa...,Project is located within one of the ten large...,KCOG,"1401 19th Street, Suite 300",Non-Infrastructure,,35.37,-119.02,0.00,CA,Point In State
415,N,6-Kern County - D6-1,5,Application,POINT (-118.89088 35.43151),Bakersfield,Kern County - D6,,34,23,16,Kern,Rehabilitate & resurface the Kern River Parkwa...,Kern River Parkway Multi-Use Path between Mano...,Kern River Parkway Multi-Use Path Safety Impro...,Project is located within one of the ten large...,KCOG,"2700 M Street, Suite 400",Infrastructure - Small,,35.43,-118.89,0.00,CA,Point In State
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42,Y,"7-Los Angeles, City of-5",5.00,Funded,POINT (-118.45719 34.22689),Los Angeles,"Los Angeles, City of",,46,29,18,Los Angeles,Transform one of the City’s most traffic-stres...,The one-quarter mile radius around the Panoram...,SRTS Panorama City Elementary School Project,Project is located within one of the ten large...,SCAG,"100 S. Main Street, 9th Floor",,6149000.00,34.23,-118.46,0.00,CA,Point In State
41,Y,"6-Huron, City of-1",5.00,Funded,POINT (-120.10304 36.21027),Huron,"Huron, City of",,31,21,12,Fresno,"The project will add Class II, III and IV bike...",The proposed project will take place on Lassen...,City of Huron Bicyclist and Pedestrian Safety ...,Project is located within one of the ten large...,COFCG,PO Box 339,,1769000.00,36.21,-120.10,0.00,CA,Point In State
38,Y,10-Tuolumne County-1,5.00,Funded,POINT (120.23264 37.58579),Sonora,Tuolumne County,,8,4,8,Tuolumne,"Project includes approximately 4,200 linear fe...","On 5th Ave, from Jamestown Rd, cross 108 to 7t...",Jamestown Community Connectivity Project,Project is located outside one of the ten larg...,Caltrans,2 S. Green Street,,2071000.00,37.59,120.23,,,Point Not In State
40,Y,"7-Long Beach, City of-1",5.00,Funded,POINT (-118.19233 33.77071),Long Beach,"Long Beach, City of",,70,47,33,Los Angeles,Transform Downtown Long Beach into a pedestria...,Select intersections in Downtown Long Beach (b...,Downtown Long Beach Walkable Corners,Project is located within one of the ten large...,SCAG,"411 West Ocean Boulevard, 4th Floor",,7893000.00,33.77,-118.19,0.00,CA,Point In State


In [33]:
joined>>filter(_.a2_proj_lat>300)

Unnamed: 0,awarded,project_app_id,project_cycle,data_origin,geometry,a1_imp_agcy_city,a1_imp_agcy_name,a1_proj_partner_agcy,assembly_district,congressional_district,senate_district,a2_county,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a1_imp_agcy_street,a3_proj_type,total_atp_$,a2_proj_lat,a2_proj_long,index_right,State,point_check
283,N,"7-Pasadena, City of-1",5.0,Application,POINT (1883071.21000 6516102.76300),Pasadena,"Pasadena, City of",,41,27,25,Los Angeles,The goal of the project is to develop ATP for ...,"Northwest Pasadena: 210 Freeway to the South, ...",Northwest Pasadena Active Transportation Plan ...,Project is located within one of the ten large...,SCAG,221 East Walnut Suite 210,Plan,,6516102.76,1883071.21,,,Point Not In State
260,N,"7-Pasadena, City of-2",5.0,Application,POINT (1883071.21000 6516102.76300),Pasadena,"Pasadena, City of",,41,27,25,Los Angeles,Replacement of existing standard crosswalks wi...,"Northwest Pasadena: 210 Freeway to the south, ...",Northwest Pasadena Continental Crosswalk Imple...,Project is located within one of the ten large...,SCAG,221 East Walnut Street,Infrastructure - Small,,6516102.76,1883071.21,,,Point Not In State
13,Y,11-San Diego Association of Governments (SANDA...,5.0,Funded,POINT (1170624.00000 324511.00000),San Diego,San Diego Association of Governments (SANDAG),City of San Diego,"78, 80","51, 53",40,San Diego,Buffered bike lanes; traffic calming and ped a...,The Orange Family Friendly Street Project is 2...,Orange Family Friendly Street Project,Project is located within one of the ten large...,SANDAG,"401 B Street, Suite 800",,4317000.0,324511.0,1170624.0,,,Point Not In State


In [34]:
joined2 = joined>>filter(_.a2_proj_lat<300)

In [35]:
## map shows multiple points out of CA
#joined2.explore("point_check", cmap="tab20b")  

In [36]:
joined2.point_check.value_counts()

Point In State        320
Point Not In State    127
Name: point_check, dtype: int64

In [37]:
(joined2>>filter(_.point_check=='Point Not In State')
 >>select(_.data_origin, _.project_app_id,_.a1_imp_agcy_city, _.geometry, _.a2_proj_lat, _.a2_proj_long)
 >>arrange(_.a2_proj_long)).iloc[1:50]

Unnamed: 0,data_origin,project_app_id,a1_imp_agcy_city,geometry,a2_proj_lat,a2_proj_long
28,Application,1-Round Valley Indians Tribe-1,Covelo,POINT (-125.26222 39.79472),39.79,-125.26
214,Application,5-San Luis Obispo County-2,San Luis Obispo,POINT (-120.87421 31.41508),31.42,-120.87
351,Application,"7-Cerritos, City of-2",Cerritos,POINT (-118.04340 33.50450),33.5,-118.04
23,Funded,"11-National City, City of-3",National City,POINT (32.65651 32.65651),32.66,32.66
186,Application,"8-Needles, City of-1",Needles,POINT (114.36110 34.50040),34.5,114.36
402,Application,"11-National City, City of-1",National City,POINT (117.09179 32.68752),32.69,117.09
277,Application,"11-Chula Vista, City of-1",Chula Vista,POINT (117.09559 32.63669),32.64,117.1
43,Funded,"11-Imperial Beach, City of-1",Imperial Beach,POINT (117.11441 32.57646),32.58,117.11
247,Application,8-Riverside County-3,Riverside,POINT (117.13044 33.82075),33.82,117.13
241,Application,"11-San Diego, City of-4",San Diego,POINT (117.16100 32.71500),32.72,117.16


* most longitudes are positive, when they should be negative, resulting in the longitudes being in east Asia

#### QUICK FIX for wrong longs
* funded data lost mapping data from geometry column

In [38]:
df_map_correct = (joined2>>filter(_.a2_proj_long<0))

In [39]:
df_map_incorrect = (joined2>>filter(_.a2_proj_long>0))

In [40]:
df_map_incorrect['a2_proj_long'] = df_map_incorrect['a2_proj_long'] * (-1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [41]:
df_map_corrected = (geography_utils.create_point_geometry(df_map_incorrect, longitude_col = 'a2_proj_long', latitude_col = 'a2_proj_lat'))

In [42]:
df_map2 = (pd.concat([df_map_corrected, df_map_correct]))

In [43]:
df_map2 = df_map2.drop(columns=['index_right','State','point_check']) 

In [44]:
joined3 = (df_map2.sjoin(ca_bounds, how='left'))

In [45]:
joined3 = _report_utils.check_point_in_state(joined3,
                        'State',
                        'CA')

In [46]:
joined3.point_check.value_counts()

Point In State        437
Point Not In State     10
Name: point_check, dtype: int64

* after changing the positive longitudes to negative, the number of points <strong>"Not In State"</strong> went from 127 to 10

In [47]:
#check again 
joined3.explore("point_check", cmap="tab20b")  

In [48]:
# still have some weird locations but better than before

In [49]:
## going back to just district

In [50]:
need_help = (joined3>>filter(_.point_check=='Point Not In State') 
 >>select(_.data_origin, _.project_app_id, _.a1_imp_agcy_city, _.geometry, _.a2_proj_lat, _.a2_proj_long)
 >>arrange(_.a2_proj_long)).reset_index()

In [51]:
need_help

Unnamed: 0,index,data_origin,project_app_id,a1_imp_agcy_city,geometry,a2_proj_lat,a2_proj_long
0,73,Application,9-California Department of Transportation-6,Bishop,POINT (-188.42980 37.36138),37.36,-188.43
1,232,Application,"8-Jurupa Valley, City of-1",Jurupa Valley,POINT (-177.42278 33.99897),34.0,-177.42
2,28,Application,1-Round Valley Indians Tribe-1,Covelo,POINT (-125.26222 39.79472),39.79,-125.26
3,214,Application,5-San Luis Obispo County-2,San Luis Obispo,POINT (-120.87421 31.41508),31.42,-120.87
4,316,Application,"7-Culver City, City of-2",Culver City,POINT (-118.10346 33.47235),33.47,-118.1
5,351,Application,"7-Cerritos, City of-2",Cerritos,POINT (-118.04340 33.50450),33.5,-118.04
6,382,Application,"7-Baldwin Park, City of-1",Baldwin Park,POINT (-117.96491 43.10680),43.11,-117.96
7,155,Application,"11-Encinitas, City of-1",Encinitas,POINT (-117.28330 32.02250),32.02,-117.28
8,186,Application,"8-Needles, City of-1",Needles,POINT (-114.36110 34.50040),34.5,-114.36
9,23,Funded,"11-National City, City of-3",National City,POINT (-32.65651 32.65651),32.66,-32.66


In [52]:
### adding in census geojson
### following code does not work

#shape = gpd.read_file('gs://calitp-analytics-data/data-analyses/dla/atp/CA_Places_TIGER2016.shp')
#census_bound = gpd.read_file('gs://calitp-analytics-data/data-analyses/census_boundaries/boundary_tract_2018.geojson')

In [53]:
places = "https://data.ca.gov/dataset/e212e397-1277-4df3-8c22-40721b095f33/resource/436fc714-831c-4070-b44b-b06dcde6bf18/download/ca-places-boundaries.zip"
places_ca = gpd.read_file(places)

In [54]:
places_ca = places_ca>>select(_.NAME, _.NAMELSAD, _.INTPTLAT, _.INTPTLON, _.geometry)

In [55]:
places_ca['geometry2'] = places_ca['geometry'].centroid

In [56]:
places_ca.sample(4)

Unnamed: 0,NAME,NAMELSAD,INTPTLAT,INTPTLON,geometry,geometry2
860,Graniteville,Graniteville CDP,39.4443656,-120.7360303,"POLYGON ((-13442540.754 4783484.331, -13442535...",POINT (-13440274.641 4785525.509)
90,Portola,Portola city,39.820799,-120.474293,"MULTIPOLYGON (((-13410323.892 4839537.975, -13...",POINT (-13411022.309 4840421.488)
455,Fort Bragg,Fort Bragg city,39.4410694,-123.8036201,"MULTIPOLYGON (((-13779179.962 4785501.483, -13...",POINT (-13781733.462 4785109.405)
494,Alleghany,Alleghany CDP,39.4666599,-120.8411238,"POLYGON ((-13452813.096 4788380.765, -13452806...",POINT (-13451973.851 4788739.611)


In [57]:
##check points to make sure 
#places_ca.explore("NAME")  

In [58]:
# places_geom = dict(zip(places_ca['NAME'], 
#                           places_ca['geometry2']))

In [59]:
#need_help['geometry3'] = df['a1_imp_agcy_city'].map(places_geom)

In [60]:
places_ca= places_ca>>select(_.NAME, _.NAMELSAD, _.INTPTLAT, _.INTPTLON, _.geometry2)

In [61]:
places_ca.sample()

Unnamed: 0,NAME,NAMELSAD,INTPTLAT,INTPTLON,geometry2
1346,Sleepy Hollow,Sleepy Hollow CDP,38.0120356,-122.5876053,POINT (-13646391.710 4581127.146)


In [62]:
need_help2 = ((pd.merge(need_help, places_ca,
           how = 'left',
           left_on='a1_imp_agcy_city',
           right_on='NAME'))>>select(_.data_origin, _.project_app_id, _.a1_imp_agcy_city, _.NAME,
                                     _.NAMELSAD,_.INTPTLAT,_.INTPTLON))

In [63]:
need_help2

Unnamed: 0,data_origin,project_app_id,a1_imp_agcy_city,NAME,NAMELSAD,INTPTLAT,INTPTLON
0,Application,9-California Department of Transportation-6,Bishop,Bishop,Bishop city,37.3663813,-118.3958082
1,Application,"8-Jurupa Valley, City of-1",Jurupa Valley,Jurupa Valley,Jurupa Valley city,34.0025907,-117.4676122
2,Application,1-Round Valley Indians Tribe-1,Covelo,Covelo,Covelo CDP,39.8001872,-123.2526012
3,Application,5-San Luis Obispo County-2,San Luis Obispo,San Luis Obispo,San Luis Obispo city,35.263954,-120.661126
4,Application,"7-Culver City, City of-2",Culver City,Culver City,Culver City city,34.0058204,-118.3967807
5,Application,"7-Cerritos, City of-2",Cerritos,Cerritos,Cerritos city,33.8677431,-118.0694719
6,Application,"7-Baldwin Park, City of-1",Baldwin Park,Baldwin Park,Baldwin Park city,34.0828245,-117.9712858
7,Application,"11-Encinitas, City of-1",Encinitas,Encinitas,Encinitas city,33.0506323,-117.2636163
8,Application,"8-Needles, City of-1",Needles,Needles,Needles city,34.8135697,-114.6253021
9,Funded,"11-National City, City of-3",National City,National City,National City city,32.6658617,-117.097361


In [64]:
need_help2 = (geography_utils.create_point_geometry(need_help2,
                                                    longitude_col = 'INTPTLON',
                                                    latitude_col = 'INTPTLAT'))

In [65]:
need_help2

Unnamed: 0,data_origin,project_app_id,a1_imp_agcy_city,NAME,NAMELSAD,INTPTLAT,INTPTLON,geometry
0,Application,9-California Department of Transportation-6,Bishop,Bishop,Bishop city,37.3663813,-118.3958082,POINT (-118.39581 37.36638)
1,Application,"8-Jurupa Valley, City of-1",Jurupa Valley,Jurupa Valley,Jurupa Valley city,34.0025907,-117.4676122,POINT (-117.46761 34.00259)
2,Application,1-Round Valley Indians Tribe-1,Covelo,Covelo,Covelo CDP,39.8001872,-123.2526012,POINT (-123.25260 39.80019)
3,Application,5-San Luis Obispo County-2,San Luis Obispo,San Luis Obispo,San Luis Obispo city,35.263954,-120.661126,POINT (-120.66113 35.26395)
4,Application,"7-Culver City, City of-2",Culver City,Culver City,Culver City city,34.0058204,-118.3967807,POINT (-118.39678 34.00582)
5,Application,"7-Cerritos, City of-2",Cerritos,Cerritos,Cerritos city,33.8677431,-118.0694719,POINT (-118.06947 33.86774)
6,Application,"7-Baldwin Park, City of-1",Baldwin Park,Baldwin Park,Baldwin Park city,34.0828245,-117.9712858,POINT (-117.97129 34.08282)
7,Application,"11-Encinitas, City of-1",Encinitas,Encinitas,Encinitas city,33.0506323,-117.2636163,POINT (-117.26362 33.05063)
8,Application,"8-Needles, City of-1",Needles,Needles,Needles city,34.8135697,-114.6253021,POINT (-114.62530 34.81357)
9,Funded,"11-National City, City of-3",National City,National City,National City city,32.6658617,-117.097361,POINT (-117.09736 32.66586)


In [66]:
## mapping to check if the points match. it does.
need_help2.explore("data_origin")

### Adding Flag for corrected geometries

In [67]:
#### Check what the geometry looks like for the points that do not fall within CA State Bounds

In [68]:
joined>>filter(_.point_check=='Point Not In State')>>select(_.awarded, _.geometry, _.a1_imp_agcy_city,
                                                            _.a1_imp_agcy_name, _.a2_county, _.a2_proj_lat,
                                                            _.a2_proj_long, _.index_right, _.State, _.point_check)>>arrange(_.a2_proj_lat)


Unnamed: 0,awarded,geometry,a1_imp_agcy_city,a1_imp_agcy_name,a2_county,a2_proj_lat,a2_proj_long,index_right,State,point_check
214,N,POINT (-120.87421 31.41508),San Luis Obispo,San Luis Obispo County,San Luis Obispo,31.42,-120.87,,,Point Not In State
155,N,POINT (117.28330 32.02250),Encinitas,"Encinitas, City of",San Diego,32.02,117.28,,,Point Not In State
43,Y,POINT (117.11441 32.57646),Imperial Beach,"Imperial Beach, City of",San Diego,32.58,117.11,,,Point Not In State
277,N,POINT (117.09559 32.63669),Chula Vista,"Chula Vista, City of",San Diego,32.64,117.10,,,Point Not In State
23,Y,POINT (32.65651 32.65651),National City,"National City, City of",San Diego,32.66,32.66,,,Point Not In State
...,...,...,...,...,...,...,...,...,...,...
317,N,POINT (120.17330 41.52910),Alturas,Modoc County,Modoc,41.53,120.17,,,Point Not In State
382,N,POINT (117.96491 43.10680),Baldwin Park,"Baldwin Park, City of",Los Angeles,43.11,117.96,,,Point Not In State
13,Y,POINT (1170624.00000 324511.00000),San Diego,San Diego Association of Governments (SANDAG),San Diego,324511.00,1170624.00,,,Point Not In State
283,N,POINT (1883071.21000 6516102.76300),Pasadena,"Pasadena, City of",Los Angeles,6516102.76,1883071.21,,,Point Not In State


## Metrics

In [69]:
unique_agencies = df>>group_by(_.project_cycle, _.a2_county, _.data_origin)>>summarize(n_unique_agency= _.a1_imp_agcy_name.nunique())

In [70]:
unique_agencies =(spread(unique_agencies, "data_origin", "n_unique_agency"))

In [71]:
unique_agencies = unique_agencies.rename(columns={"a2_county":"county_name"})

In [72]:
unique_agencies['Application'].fillna(0, inplace=True)
unique_agencies['Funded'].fillna(0, inplace=True)

In [73]:
unique_agencies['Application'] = unique_agencies['Application'].astype('int32')
unique_agencies['Funded'] = unique_agencies['Funded'].astype('int32')

In [74]:
display(HTML("<h3>Number of Unique Agencies By County</h3>"))
display(HTML(_dla_utils.pretify_tables(unique_agencies)))


HTML(value='<h3>Number of Unique Agencies By County</h3>')

HTML(value='<style type="text/css">\n#T_f54da th {\n  text-align: center;\n}\n#T_f54da_row0_col0, #T_f54da_row…

### Success Rates

In [75]:
df = (_report_utils.reorder_namecol(df,
                    og_name_col= 'a1_imp_agcy_name',
                    new_name_col= 'imp_agency_name_new', 
                    split_on = ", ",
                   order_on ='pt2_pt1'))



In [76]:
successes = (df>>group_by(_.awarded)>>count(_.imp_agency_name_new))>>spread("awarded", "n")>>arrange(-_.Y)

In [77]:
successes['total'] = (successes['N'] + successes['Y'])

In [78]:
successes['success_rate'] = (successes['Y']/successes['total'])

In [79]:
successes = successes.rename(columns={"imp_agency_name_new":"Implementing Agency", "N":"Projects Not Funded",
                                    "Y":"Funded Projects", "total":"Total Applications"})

In [80]:
successes_top = successes>>filter(_.success_rate>0)

In [81]:
successes_top['success_rate'] = successes_top['success_rate'].transform(lambda x: '{:,.2%}'.format(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [82]:
display(HTML("<h3>Application Success Rate</h3>"))

display(HTML(f"There are <strong>{len(successes>>filter(_.success_rate==0))}</strong> "
            f"implenting agencies with <strong> zero </strong>"
            f"successful applications."))

display(HTML(f"There are <strong>{len(successes>>filter(_.success_rate!=0))}</strong> "
            f"implenting agencies with <strong> one of more </strong>"
            f"successful applications."))
    
display(HTML("</br><h4> Success Rates for Agencies with Successful Applications </h4>"))
display(HTML(_dla_utils.pretify_tables(successes_top>>select(_['Implementing Agency'], _['Total Applications'], _.success_rate))))

HTML(value='<h3>Application Success Rate</h3>')

HTML(value='There are <strong>209</strong> implenting agencies with <strong> zero </strong>successful applicat…

HTML(value='There are <strong>42</strong> implenting agencies with <strong> one of more </strong>successful ap…

HTML(value='</br><h4> Success Rates for Agencies with Successful Applications </h4>')

HTML(value='<style type="text/css">\n#T_eaeb3 th {\n  text-align: center;\n}\n#T_eaeb3_row0_col0, #T_eaeb3_row…