# ATP Data Exploration

In [1]:
import numpy as np
import pandas as pd
from siuba import *

import altair as alt
import altair_saver
from plotnine import *

from IPython.display import Markdown, HTML, Image
import ipywidgets as widgets
from ipywidgets import interact, interactive

from calitp import to_snakecase
import intake

from shared_utils import altair_utils
from shared_utils import styleguide

from dla_utils import _dla_utils



In [2]:
pd.set_option("display.max_columns", 220)


## Reading in w/o utils

In [3]:
main_details = to_snakecase(pd.read_excel("gs://calitp-analytics-data/data-analyses/dla/atp/Main Details.xls"))
project_details = to_snakecase(pd.read_excel("gs://calitp-analytics-data/data-analyses/dla/atp/Project Details.xls"))

In [4]:
# main_details.info()

In [5]:
# project_details.sample()

In [6]:
# project_details.project_cycle.value_counts()

In [7]:
# project_details>>count(_.project_app_id)>>filter(_.n>1)

In [8]:
# project_details>>group_by(_.project_cycle)>>count(_.project_app_id)>>filter(_.n>1)

* multiple project ids, but no duplicates by project cycle. 

In [9]:
## merging 

In [10]:
# df = pd.merge(main_details, project_details, how="outer", on=["project_app_id", "project_cycle"], indicator='matches')

In [11]:
# (df>>filter(_.project_app_id =='1-Mendocino Council of Governments-1')>>select(_.project_app_id, 
#                                                                               _.project_cycle,
#                                                                              _.matches,
#                                                                               _.agency_app_num))

In [12]:
#df.info()

### Comparing column names

code help: https://stackoverflow.com/questions/45482755/compare-headers-of-dataframes-in-pandas

In [13]:
#columns in both dfs
main_details.columns.intersection(project_details.columns)

Index(['project_app_id', 'project_cycle', 'awarded'], dtype='object')

In [14]:
#columns in main_details not in project_details
main_details.columns.difference(project_details.columns)

Index(['a1_imp_agcy_city', 'a1_imp_agcy_contact', 'a1_imp_agcy_email',
       'a1_imp_agcy_fed_ma_num', 'a1_imp_agcy_ma', 'a1_imp_agcy_name',
       'a1_imp_agcy_phone', 'a1_imp_agcy_state_ma_num', 'a1_imp_agcy_street',
       'a1_imp_agcy_title', 'a1_imp_agcy_zip', 'a1_letter_of_intent',
       'a1_locode', 'a1_proj_partner_agcy', 'a1_proj_partner_contact',
       'a1_proj_partner_email', 'a1_proj_partner_exists',
       'a1_proj_partner_phone', 'a1_proj_partner_title', 'a2_assem_dist_a',
       'a2_assem_dist_b', 'a2_assem_dist_c', 'a2_congress_dist_a',
       'a2_congress_dist_b', 'a2_congress_dist_c', 'a2_county', 'a2_ct_dist',
       'a2_info_proj_descr', 'a2_info_proj_loc', 'a2_info_proj_name',
       'a2_mop_uza_population', 'a2_mpo', 'a2_output_outcome', 'a2_past_proj',
       'a2_past_proj_qty', 'a2_proj_lat', 'a2_proj_long',
       'a2_proj_scope_summary', 'a2_project_location_map', 'a2_rtpa',
       'a2_senate_dist_a', 'a2_senate_dist_b', 'a2_senatedistc',
       'a3_current

In [15]:
#columns in project_details not in main_details
project_details.columns.difference(main_details.columns)

Index(['a4_act_other_1', 'a4_act_other_1_descr', 'a4_act_other_2',
       'a4_act_other_2_decr', 'a4_after_school', 'a4_bike_classes',
       'a4_bike_gap_pct', 'a4_bike_rodeos', 'a4_bike_train', 'a4_classrooms',
       ...
       'v_other_traffic_calming_imprv_2', 'v_other_traffic_calming_qty_1',
       'v_other_traffic_calming_qty_2', 'v_remove_right_turn_pocket',
       'v_remove_travel_ln', 'v_sig_inter_new_roundabout',
       'v_sig_inter_timing_improv', 'v_speed_feedback_signs',
       'v_un_sig_inter_new_roundabout', 'v_un_sig_inter_new_traf_sig'],
      dtype='object', length=132)

## Reading in w/ utils

In [16]:
import utils

In [17]:
df = utils.read_in_data()

In [18]:
df.head()

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_exhibit22_plan,attch_letters_of_support,attch_link,attch_ni_workplan,attch_project_estimate,completed_pdf_form,main_datetime_stamp,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,matches
0,Merced,10-5939R,Yes,Merced County,00033S,345 west 7th street,Deputy Director,95340,,,No,,21,,,16,,,Merced,10,"PA&ED, PS&E, and CON funding for construction ...",1) South side of Haskell Ave from Cody ave to ...,Planada Sidewalk Infill Project,Project is located outside one of the ten larg...,MCAG,No,0,37.29,120.31,The Planada Sidewalk Infill Project is located...,,,12,,,,No,,Yes,,Yes,,No,Infrastructure - Small,Yes,20,1,Yes,80,Yes,0,,0,No,1,1802,Planada Sidewalk infill ATP cross section 1.pdf,Attachment A- Signature Page.pdf,Existing Photos Attachment.pdf,Planada ATP Plan Concept.pdf,Attachment-B-Engr-Checklist (MH).pdf,,Letters of Support.pdf,,,Project Estimate.pdf,,2020-06-09 10:33:08,10-Merced County-1,CYCLE 5,N,5939,No,No,,Sidewalk infill along portions of Haskell aven...,No,0,0,0,1500,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,0,0,0,0,0,6,0,1500,0,,0,,0,5,0,4,3,0,0,0,0,0,,0,0.0,0,0,0,0,No,No,Yes,,0,0,,0,0,0,0,0,0,0,1802,2020-06-09 10:33:08,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both
1,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Bishop Street Class 3 Bicycle Boulevard with T...,Bishop Street from Flower Street to Standard A...,Bishop Street Bicycle Boulevard Project,Project is located within one of the ten large...,SCAG,Yes,2,33.74,117.86,This project will implement a Class 3 bicycle ...,,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Medium,Yes,50,0,Yes,50,No,0,,0,No,4,1811,Attachment K - Not Applicable.pdf,Attachment A - Signature Page.pdf,Attachment E - Photos of Existing Conditions.pdf,Attachment D - Project .Plans.pdf,Attachment B - Checklist.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost .Estimate.pdf,,2020-08-20 18:49:12,"12-Santa Ana, City of-4",CYCLE 5,N,5063,No,No,,"Install 1.15 mile bike boulevard, construction...",Yes,0,0,0,6336,0,,0,0,0,0,0,0,0,,0,,0,0,2,0,0,,0,0,0,0,,0,,0,0,0,100,0,0,0,0,0,0,0,0,,0,,0,38,0,15,16,0,18,3,0,0,,1,6.0,0,18,0,0,Yes,No,No,,0,0,,0,0,8800,0,0,0,0,1811,2020-08-20 18:49:12,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both
2,City of Pacifica,04-5350-F15,Yes,"Pacifica, City of",,151 Milagra Drive,Associate Civil Engineer,94044,,,No,,22,,,14,,,San Mateo,4,CON funding for installing bicycling facilitie...,On Palmetto Ave between Paloma Ave and West Av...,Palmetto Ave - Esplanade Ave Bicycle & Pedestr...,Project is located outside one of the ten larg...,MTC,No,0,37.65,-122.49,The project will install a combination of Clas...,,,13,,,,No,,Yes,,Yes,,No,Infrastructure - Small,Yes,50,2,Yes,50,No,0,,0,No,1,1804,,Attachment-A-Signature-page.pdf,Photos.pdf,Attachment D_Palmetto & Esplanade Ped-Bike Imp...,Attachment B_Engineers Checklist.pdf,,Letters of Support.pdf,,,Attachment F_ ATP Cycle 5_Palmetto-Esplanade B...,,2020-06-15 11:05:03,"4-Pacifica, City of-1",CYCLE 5,N,5350,0,0,,Bicycling and pedestrian amenities will be ins...,Yes,0,0,13752,5748,0,,0,0,0,0,0,0,0,,0,,0,0,0,1,0,,0,0,0,0,,0,,0,0,0,40,2,0,0,0,20,0,0,0,,0,,0,9,0,0,0,0,0,0,0,0,,0,0.0,0,0,0,0,Yes,No,No,,0,0,,0,0,0,0,0,0,0,1804,2020-06-15 11:05:03,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both
3,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Pedestrian traffic safety improvements for Jef...,"In the City of Santa Ana, the safe routes to s...",Jefferson ES_Thorpe Fundamental_McFadden Int_G...,Project is located within one of the ten large...,SCAG,Yes,2,33.71,117.89,"This project will be repairing, replacing and ...",,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Large,No,0,5,Yes,100,Yes,0,,0,No,13,1822,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D -Plans.pdf,Attachment B - Check list.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,,2020-09-08 10:15:52,"12-Santa Ana, City of-13",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 8 intersections, ...",Yes,0,0,0,0,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,50,0,0,0,0,60,0,0,0,Left Turn Arrow,3,Enhanced Crosswalk Unsignalized,3,218,1000,7,0,0,1,0,0,0,,0,0.0,0,7,0,0,Yes,No,No,,0,0,,0,0,0,0,0,0,0,1822,2020-09-08 10:15:52,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both
4,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Pedestrian traffic safety improvements for La...,"In the City of Santa Ana, the safe routes to s...",Lathrop Intermediate_Lowell ES_Martin ES_Pio P...,Project is located within one of the ten large...,SCAG,Yes,4,33.73,117.87,"This project will be repairing, replacing and ...",,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Large,No,0,5,Yes,100,Yes,0,,0,No,14,1823,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D - Plan.pdf,Attachment B - Checklist.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,,2020-08-31 12:34:31,"12-Santa Ana, City of-14",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 6 intersections, ...",Yes,0,0,0,0,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,50,0,0,0,0,43,0,0,0,Enhance crosswalk (unsignalized),7,Raised Crosswalk,2,189,3455,5,0,0,1,0,0,0,,0,0.0,2,5,0,0,Yes,No,No,,0,0,,0,0,0,0,0,0,0,1823,2020-08-31 12:34:31,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both


In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 882 entries, 0 to 881
Columns: 211 entries, a1_imp_agcy_city to matches
dtypes: category(1), datetime64[ns](2), float64(23), int64(97), object(88)
memory usage: 1.4+ MB


In [20]:
df.matches.value_counts()

both          882
left_only       0
right_only      0
Name: matches, dtype: int64

In [21]:
df.awarded_y.value_counts()

N    882
Name: awarded_y, dtype: int64

In [22]:
### Comparing merged df with cleaned data

In [23]:
columns_to_drop = ['a1_imp_agcy_contact','a1_imp_agcy_email','a1_imp_agcy_phone',
                      'a1_proj_partner_contact', 'a1_proj_partner_email', 'a1_proj_partner_phone']


In [24]:
alldata = to_snakecase(pd.read_excel('gs://calitp-analytics-data/data-analyses/dla/atp/Master_AllData_Cycle5FieldMapping.xls', sheet_name='AllData'))

In [25]:
alldata = alldata.drop(columns = columns_to_drop)

In [26]:
alldata.sample()

Unnamed: 0,awarded,project_cycle,a2_ct_dist,#,atp_id,ppno,ppno_1,a3_proj_type,project_app_id,a2_info_proj_name,a2_county,a1_locode,a1_imp_agcy_name,a1_imp_agcy_street,a1_imp_agcy_city,a1_imp_agcy_zip,a1_imp_agcy_title,a1_imp_agcy_ma,a1_imp_agcy_state_ma_num,a1_imp_agcy_fed_ma_num,a1_proj_partner_exists,a1_proj_partner_agcy,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a2_info_proj_descr,a2_info_proj_loc,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_current_plan,a3_trails,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo
58,N,CYCLE 5,3,,,,,Infrastructure - Small,3-Nevada County-1,Combie Road Active Transportation Project,Nevada,5917,Nevada County,"950 Maidu Avenue, Suite 170",Nevada City,95959,Principal Civil Engineer,Yes,491,03-5917F15,No,,,1,,,1,,,1,,,Install Class 1 pathway on north side of Combi...,On Combie Road at various locations between Hi...,Project is located outside one of the ten larg...,Caltrans,No,0,39.04,-121.09,The project will construct a new Class 1 separ...,,Nevada CTC,,Yes,,Yes,,Yes,,No,Yes,50,3,Yes,50,Yes,0,No,0,Yes,Yes,No,No,Note: ATP supersedes previous bicycle and ped ...,"Construction of 3,000' Class 1 pathway, 300' s...",0,0,0,0,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,3000,0,0,,0,,0,0,0,100,0,0,0,0,1,0,290,0,,0,,0,1,0,1,2,8,0,0,0,0,,0,0,0,0,0,0,Yes,No,No,,0,0,,0,0,0,0,0,0,0,N,0,N,0,N,0,N,0,,0,N,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No


In [27]:
cleaned = to_snakecase(pd.read_excel('gs://calitp-analytics-data/data-analyses/dla/atp/Master_AllData_Cycle5FieldMapping.xls', sheet_name='AllDataFieldMapping Cleaned'))

In [28]:
cleaned.sample()

Unnamed: 0,awarded,project_cycle,a2_ct_dist,#,atp_id,ppno,ppno_1,a3_proj_type,project_app_id,a2_info_proj_name,a2_county,a1_locode,a1_imp_agcy_name,a1_imp_agcy_street,a1_imp_agcy_city,a1_imp_agcy_zip,a1_imp_agcy_contact,a1_imp_agcy_title,a1_imp_agcy_email,a1_imp_agcy_phone,a1_imp_agcy_ma,a1_imp_agcy_state_ma_num,a1_imp_agcy_fed_ma_num,a1_proj_partner_exists,a1_proj_partner_agcy,a1_proj_partner_contact,a1_proj_partner_title,a1_proj_partner_email,a1_proj_partner_phone,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,congressional_district,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,senate_district,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a2_info_proj_descr,a2_info_proj_loc,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_current_plan,a3_trails,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,...,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,original_prog__amt___pa_ed_,orig__prog__year__pa_ed_,original_prog__amt___ps_e_,orig__prog__year__ps_e_,original_prog__amt___rw_,orig__prog__year__rw_,orignal_prog__amt___con_,orig__prog__year__con_,original_prog__amt___con_ni_,orig__prog__year__con_ni_,unnamed:_215,prog__amount__paed__1,prog__amount__pse__1,prog__amount__rw__1,prog__amount__con__1,prog__amount__con_ni__1,unnamed:_221,fund_year_1,prog__amount__paed__2,prog__amount__pse__2,prog__amount__rw__2,prog__amount__con__2,prog__amount__con_ni__2,unnamed:_228,fund_year_2,prog__amount__paed__3,prog__amount__pse__3,prog__amount__rw__3,prog__amount__con__3,prog__amount__con_ni__3,unnamed:_235,fund_year_3,prog__amount__paed__4,prog__amount__pse__4,prog__amount__rw__4,prog__amount__con__4,prog__amount__con_ni__4,unnamed:_242,fund_year_4,unnamed:_244,unnamed:_245,unnamed:_246,unnamed:_247,unnamed:_248,unnamed:_249,unnamed:_250,unnamed:_251,unnamed:_252
385,Y,5,7,,,,,Infrastructure - Medium,"7-Los Angeles, City of-5",SRTS Panorama City Elementary School Project,LA,5006,"Los Angeles, City of","100 S. Main Street, 9th Floor",Los Angeles,90012,Margot Ocañas,Safe Routes to School Director,Margot.Ocanas@lacity.org,213-928-9707,Yes,07-00152S,07-5006F15,No,,,,,,46,46,,,29,29,,,18,18,,,Transform one of the City’s most traffic-stres...,The one-quarter mile radius around the Panoram...,Project is located within one of the ten large...,SCAG,Yes,1,34.23,-118.46,"Panorama City Elementary School, in the heart ...",,,,No,,Yes,,No,,Yes,Yes,58,1,Yes,42,Yes,0,,0,Yes,No,0,0,"Mobility Plan 2035, Vision Zero LA, Green New ...","23 curb extensions, 6 median extensions, 2 tra...",1,584,3700,12172,0,,0,0,0,0,0,0,0,Bicycle Loop Detector,1,Bus Bulbs with Class I Bicycle Lanes,7,0,0,0,0,,0,0,0,0,,0,,0,0,0,30,0,0,...,0,0,0,0,0,0,0,N,0,N,0,N,0,N,0,,0,N,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [29]:
#filter_col = [col for col in cleaned if col.startswith('unnamed')]

In [30]:
#remove columns that have some agency specific information
cleaned= cleaned.drop(columns = columns_to_drop)

In [31]:
#remove columns that were manually entered - last 34 columns
cleaned = cleaned.drop(columns = (cleaned.iloc[:, 199:]))

In [32]:
#remove columns that are blank and unnamed
#cleaned=cleaned.drop(columns=filter_col)

In [33]:
#cleaned.columns.get_loc("original_prog__amt___pa_ed_")

In [34]:
#(cleaned.iloc[:, 199:].columns.tolist())

In [35]:
#making sure they are null
#(cleaned.iloc[1:, 199:]).info()

In [36]:
cleaned.sample()

Unnamed: 0,awarded,project_cycle,a2_ct_dist,#,atp_id,ppno,ppno_1,a3_proj_type,project_app_id,a2_info_proj_name,a2_county,a1_locode,a1_imp_agcy_name,a1_imp_agcy_street,a1_imp_agcy_city,a1_imp_agcy_zip,a1_imp_agcy_title,a1_imp_agcy_ma,a1_imp_agcy_state_ma_num,a1_imp_agcy_fed_ma_num,a1_proj_partner_exists,a1_proj_partner_agcy,a1_proj_partner_title,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,congressional_district,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,senate_district,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a2_info_proj_descr,a2_info_proj_loc,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_current_plan,a3_trails,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo
320,Y,5,4,,,,,Infrastructure - Large,"4-Oakland, City of-2",East Oakland Neighborhood Bike Routes,ALA,5012,"Oakland, City of",250 Frank H Ogawa Plaza,Oakland,94612,Funding Program Manager,Yes,00099S,04-5012R,No,,,18,18,,,13,13,,,9,9,,,Neighborhood bike routes on four corridors in ...,"Neighborhood bike routes on 81st Avenue, 85th ...",Project is located within one of the ten large...,MTC,No,0,37.75,-122.18,East Oakland's street network currently presen...,,,,No,,Yes,,Yes,,No,Yes,90,0,Yes,10,No,0,,0,Yes,No,No,No,,Construction of four Class III bicycle bouleva...,0,0,225,30780,0,,0,0,0,0,0,0,0,Two Stage Left Turn Box,4,Green Backed Sharrow,30,0,5,0,0,,0,0,0,0,,0,,0,0,0,0,0,0,0,0,269,0,0,0,Pedestrian Hybrid Beacon,2,,0,37,0,49,157,4,4,0,0,0,,0,6,2,18,0,0,No,No,No,Speed bumps,0,81,,0,0,0,0,0,0,0,N,0,N,0,N,0,N,0,,0,N,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No


### How do the merged and cleaned up columns match up?

In [37]:
df.columns.intersection(cleaned.columns)

Index(['a1_imp_agcy_city', 'a1_imp_agcy_fed_ma_num', 'a1_imp_agcy_ma',
       'a1_imp_agcy_name', 'a1_imp_agcy_state_ma_num', 'a1_imp_agcy_street',
       'a1_imp_agcy_title', 'a1_imp_agcy_zip', 'a1_proj_partner_agcy',
       'a1_proj_partner_exists',
       ...
       'a4_collab_non_profit', 'a4_collab_schools', 'a4_collab_pub_works',
       'a4_collab_other', 'a4_colab_other_descr', 'a4_plan_ped',
       'a4_plan_bike', 'a4_plan_atp', 'a4_plan_school_routes',
       'a4_row_open_street_demo'],
      dtype='object', length=191)

In [38]:
df.columns.difference(cleaned.columns)

Index(['a1_letter_of_intent', 'agency_app_num', 'app_fk', 'app_pk',
       'attch_addtl_attachments', 'attch_app_sig_page',
       'attch_conditions_photos', 'attch_conditions_project_map',
       'attch_engineeers_checklist', 'attch_exhibit22_plan',
       'attch_letters_of_support', 'attch_link', 'attch_ni_workplan',
       'attch_project_estimate', 'awarded_x', 'awarded_y',
       'completed_pdf_form', 'details_datetime_stamp', 'main_datetime_stamp',
       'matches'],
      dtype='object')

In [39]:
cleaned.columns.difference(df.columns)

Index(['#', 'assembly_district', 'atp_id', 'awarded', 'congressional_district',
       'ppno', 'ppno_1', 'senate_district'],
      dtype='object')

### Null columns

In [40]:
# finding columns with all null and dropping for now. will keep in script

In [41]:
alldatanull = alldata.columns[alldata.isna().all()].tolist()

In [42]:
alldata = alldata.drop(columns = alldatanull)

In [43]:
dfnull = df.columns[df.isna().all()].tolist()
df = df.drop(columns = dfnull)

In [44]:
cleanednull = cleaned.columns[cleaned.isna().all()].tolist()
cleaned = cleaned.drop(columns = cleanednull)

## Column Cleaning

In [45]:
df.a2_mpo.value_counts()

SCAG        315
MTC         132
SANDAG       62
Caltrans     52
SACOG        52
SJCOG        38
TCAG         37
AMBAG        34
KCOG         31
COFCG        31
SBCAG        19
SLOCOG       15
BCAG         14
TMPO         13
SRTA         11
StanCOG      11
MCTC          7
KCAG          3
MCAG          3
CVAG          2
Name: a2_mpo, dtype: int64

In [46]:
df.details_datetime_stamp.info()

<class 'pandas.core.series.Series'>
Int64Index: 882 entries, 0 to 881
Series name: details_datetime_stamp
Non-Null Count  Dtype         
--------------  -----         
882 non-null    datetime64[ns]
dtypes: datetime64[ns](1)
memory usage: 13.8 KB


In [47]:
compare_col = np.where(df["main_datetime_stamp"] == df["details_datetime_stamp"], True, False)
df["compare_datetime"] = compare_col
df.compare_datetime.value_counts()

True     763
False    119
Name: compare_datetime, dtype: int64

In [48]:
#some datetimes are seconds different others are larger
df>>filter(_.compare_datetime==False)>>select(_.details_datetime_stamp, _.main_datetime_stamp)

Unnamed: 0,details_datetime_stamp,main_datetime_stamp
27,2020-09-10 16:23:42,2020-09-10 16:23:41
37,2020-09-11 12:48:11,2020-09-11 12:48:10
55,2020-09-10 14:14:03,2020-09-10 14:14:02
67,2020-09-14 19:20:56,2020-09-14 19:20:55
75,2020-09-11 16:29:11,2020-09-11 16:29:10
...,...,...
869,2022-06-16 12:10:18,2022-06-21 11:28:23
871,2022-06-16 12:12:24,2022-06-16 12:12:23
872,2022-06-16 12:15:24,2022-06-16 12:15:23
874,2022-06-16 10:57:37,2022-06-16 10:57:36


In [49]:
df= df.drop(columns='compare_datetime')

In [50]:
df.sample(2)

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans_exists,a3_plan_bicycle_exists,a3_plan_ped_exists,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_letters_of_support,attch_ni_workplan,attch_project_estimate,main_datetime_stamp,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,matches
484,Santa Cruz,05-5936F15,Yes,Santa Cruz County,000510,"701 Ocean Street, Room 410",Project Manager,95060,ATP Cycle 6 - Rail Trail 10+11_Partnering Lett...,City of Capitola,Yes,Director of Public Works,29,,,19,,,Santa Cruz,5,Construction of 4.15 miles of Segments 10 and ...,Located along the rail line from 17th Avenue t...,Coastal Rail Trail Segments 10 and 11,Project is located outside one of the large MP...,AMBAG,Yes,3,,,The completed Monterey Bay Sanctuary Scenic Tr...,SCCRTC,17,,,Yes,No,No,Yes,Infrastructure + NI - Large,Yes,50,10,Yes,50,Yes,0,Yes,100,Yes,1,3251,Attachment-K-Additional_Seg 10+11.pdf,ATP Cycle 6_Attachment-A-Signature-Page_Seg 10...,Attachment E_ATP Cycle 6_Rail Trail 10+11.pdf,Attachment-D-Project Plans_Rail Trail 10+11.pdf,ATP_Cycle_6_Attachment-B-Engr-Checklist_Seg_10...,Attachment I_LOS_Seg. 10.11_ATP6Grant_COMBINED...,Attachment-G-Exhibit-25-R-NI-Work-Plan Seg 101...,Attachment-F-Project-Estimate_CRT10+11.xlsx,2022-06-11 15:45:16,5-Santa Cruz County-1,CYCLE 6,N,5936,0,0,"RTP, Monterey Bay Sanctuary Scenic Trail Maste...",Construction of 4.15 miles of Segments 10 and ...,Yes,0,0,0,0,0,5,3,1,0,0,5,0,Green Crossbike striping,6,,0,0,0,0,0,0,0,21856,0,Viaducts,5,Interpretive Signage,20,0,3,100,0,0,0,0,13,0,400,0,Access ramps/stairs,3,,0,1,0,0,0,0,0,0,10,0,,0,,0,1,3,0,No,Yes,Yes,Curb bulb-outs,0,4,,0,0,0,0,0,0,0,3251,2022-06-11 15:45:16,N,0,Y,44,Y,56,N,0,,N,,0,0,0,16,0,0,,40,88,40,0,44,44,0,0,8,80,,0,4,School-wide active transportation festivals,0,,Y,N,Y,Y,Y,N,,English and Spanish,Y,N,N,Y,Y,N,,N,N,N,N,No,N,both
691,Benicia,FO21,Yes,"Benicia, City of",04-5003F15,250 East L Street,Deputy Public Works Director/City Engineer,94510,,,No,,14,,,5,,,Solano,4,"RRFB with LED crosswalk illuminators, high vis...",Military West and Drolette; Drolette and Corri...,ATP Cycle 6 Safe Routes to School Improvements...,Project is located outside one of the large MP...,Caltrans,Yes,2,,,The proposed project will enhance pedestrian a...,,3,,,Yes,No,Yes,No,Infrastructure - Small,Yes,20,3,Yes,80,Yes,0,,0,No,1,3756,,Signature Page.pdf,Photos.pdf,ATP SR2S Conceptual Plan - Final 6-15-22.pdf,Attachment-B-Engr-Checklist.pdf,Letters of support_all.pdf,,Attachment-F-Project-Estimate Project Limits -...,2022-06-15 18:04:03,"4-Benicia, City of-1",CYCLE 6,N,5003,0,0,,"Install RRFBs with LED crosswalk illuminators,...",Yes,1,0,0,0,0,0,6,0,0,0,0,0,,0,,0,0,0,0,0,0,0,0,0,,0,,0,0,0,3,0,0,0,0,1,0,300,0,,0,,0,20,0,6,15,0,2,0,0,0,,0,,6,0,0,0,Yes,No,No,,0,0,,0,0,0,0,0,0,0,3756,2022-06-15 18:04:03,N,0,N,0,N,0,N,0,,N,,0,0,0,0,0,0,,0,0,0,0,0,0,0,0,0,0,,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both


In [51]:
df.columns.tolist()

['a1_imp_agcy_city',
 'a1_imp_agcy_fed_ma_num',
 'a1_imp_agcy_ma',
 'a1_imp_agcy_name',
 'a1_imp_agcy_state_ma_num',
 'a1_imp_agcy_street',
 'a1_imp_agcy_title',
 'a1_imp_agcy_zip',
 'a1_letter_of_intent',
 'a1_proj_partner_agcy',
 'a1_proj_partner_exists',
 'a1_proj_partner_title',
 'a2_assem_dist_a',
 'a2_assem_dist_b',
 'a2_assem_dist_c',
 'a2_congress_dist_a',
 'a2_congress_dist_b',
 'a2_congress_dist_c',
 'a2_county',
 'a2_ct_dist',
 'a2_info_proj_descr',
 'a2_info_proj_loc',
 'a2_info_proj_name',
 'a2_mop_uza_population',
 'a2_mpo',
 'a2_past_proj',
 'a2_past_proj_qty',
 'a2_proj_lat',
 'a2_proj_long',
 'a2_proj_scope_summary',
 'a2_rtpa',
 'a2_senate_dist_a',
 'a2_senate_dist_b',
 'a2_senatedistc',
 'a3_plan_active_trans_exists',
 'a3_plan_bicycle_exists',
 'a3_plan_ped_exists',
 'a3_plan_srts_exists',
 'a3_proj_type',
 'a3_st_bicycle_applies',
 'a3_st_bicycle_pct',
 'a3_st_num_schools',
 'a3_st_ped_applies',
 'a3_st_ped_pct',
 'a3_st_srts',
 'a3_trail_elig_cost',
 'a3_trail_fed

#### Columns to add/change:
* geometry column for lat long
* a2_senate_dist_b to int64
* a2_senatedistc to int64
* a1_locode to int64


In [52]:
from shared_utils import geography_utils
from dla_utils import _dla_utils

In [85]:
gdf = (geography_utils.create_point_geometry(df, longitude_col = 'a2_proj_long', latitude_col = 'a2_proj_lat'))

In [86]:
gdf.head(1)

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans_exists,a3_plan_bicycle_exists,a3_plan_ped_exists,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_letters_of_support,attch_ni_workplan,attch_project_estimate,main_datetime_stamp,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,matches,geometry
0,Merced,10-5939R,Yes,Merced County,00033S,345 west 7th street,Deputy Director,95340,,,No,,21,,,16,,,Merced,10,"PA&ED, PS&E, and CON funding for construction ...",1) South side of Haskell Ave from Cody ave to ...,Planada Sidewalk Infill Project,Project is located outside one of the ten larg...,MCAG,No,0,37.29,120.31,The Planada Sidewalk Infill Project is located...,,12,,,No,Yes,Yes,No,Infrastructure - Small,Yes,20,1,Yes,80,Yes,0,,0,No,1,1802,Planada Sidewalk infill ATP cross section 1.pdf,Attachment A- Signature Page.pdf,Existing Photos Attachment.pdf,Planada ATP Plan Concept.pdf,Attachment-B-Engr-Checklist (MH).pdf,Letters of Support.pdf,,Project Estimate.pdf,2020-06-09 10:33:08,10-Merced County-1,CYCLE 5,N,5939,No,No,,Sidewalk infill along portions of Haskell aven...,No,0,0,0,1500,0,0,0,0,0,0,0,0,,0,,0,0,0,0,0,0,0,0,0,,0,,0,0,0,0,0,0,0,0,6,0,1500,0,,0,,0,5,0,4,3,0,0,0,0,0,,0,0.0,0,0,0,0,No,No,Yes,,0,0,,0,0,0,0,0,0,0,1802,2020-06-09 10:33:08,N,0,N,0,N,0,N,0,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,POINT (120.31282 37.29159)


In [87]:
def get_num(x):
    try:
        return int(x)
    except Exception:
        try:
            return float(x)
        except Exception:
            return x  

In [110]:
columns_to_int = ['a1_locode', 'a2_senatedistc', 'a2_senate_dist_b', 'a2_assem_dist_b','a2_assem_dist_c','a2_congress_dist_b','a2_congress_dist_c','a2_proj_lat','a2_proj_long',
                  'a2_senate_dist_b','a2_senatedistc','p_un_sig_inter_new_roundabout','a4_emp_based_pct','a4_le_methods','a4_srts_le','a1_locode','a2_senatedistc','a2_senate_dist_b']

In [111]:
#gdf[columns_to_int] = gdf[columns_to_int].apply(get_num)

In [112]:
for col in columns_to_int:
    gdf[col] = gdf[col].apply(get_num)

In [114]:
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 882 entries, 0 to 881
Columns: 201 entries, a1_imp_agcy_city to geometry
dtypes: category(1), datetime64[ns](2), float64(12), geometry(1), int64(97), object(88)
memory usage: 1.4+ MB


In [115]:
gdf.select_dtypes("int64")

Unnamed: 0,a1_imp_agcy_zip,a2_assem_dist_a,a2_congress_dist_a,a2_ct_dist,a2_past_proj_qty,a2_senate_dist_a,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_pct,a3_trail_elig_cost,a3_trail_trans_pct,agency_app_num,app_pk,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_qty_1,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_improv_qty_1,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_qty_1,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,a4_reg_init_pct,a4_com_init_pct,a4_safe_route_pct,a4_fl_mile_pct,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_training,a4_act_other_1,a4_act_other_2
0,95340,21,16,10,0,12,20,1,80,0,0,1,1802,0,0,0,1500,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,1500,0,0,0,5,0,4,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1802,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,92702,69,46,12,2,34,50,0,50,0,0,4,1811,0,0,0,6336,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,38,0,15,16,0,18,3,0,0,1,0,18,0,0,0,0,0,0,8800,0,0,0,0,1811,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,94044,22,14,4,0,13,50,2,50,0,0,1,1804,0,0,13752,5748,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,40,2,0,0,0,20,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1804,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,92702,69,46,12,2,34,0,5,100,0,0,13,1822,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,50,0,0,0,0,60,0,0,0,3,3,218,1000,7,0,0,1,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,1822,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,92702,69,46,12,4,34,0,5,100,0,0,14,1823,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,50,0,0,0,0,43,0,0,0,7,2,189,3455,5,0,0,1,0,0,0,0,2,5,0,0,0,0,0,0,0,0,0,0,0,1823,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
877,91362,0,0,7,0,0,95,0,5,0,0,1,3192,0,0,0,0,24820,0,0,0,0,0,0,0,0,0,324,2,2,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,265,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3192,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
878,91733,49,32,7,5,22,75,4,25,0,0,1,3859,0,0,5100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,23000,4,0,16,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,3859,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
879,95113,25,19,4,1,15,60,0,40,0,0,3,3860,9,0,0,0,6840,0,0,0,0,0,0,0,8,6,0,0,0,4,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,60,0,48,6840,8,0,0,2,0,0,0,0,0,4,4,6840,0,0,0,0,0,0,0,0,0,3860,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
880,93101,0,0,5,2,0,25,1,75,0,0,2,3845,2,0,1700,22410,0,0,0,0,0,0,0,0,10,24,0,0,0,0,0,0,0,0,0,0,0,0,20,1,0,0,1,110,0,2820,0,0,0,23,0,0,0,0,0,0,0,0,0,0,42,17,2700,0,0,0,0,0,0,0,0,0,3845,50,0,50,0,0,2,4,4,0,0,0,0,0,4,2,0,0,0,0,0,0,0,0


In [116]:
gdf.select_dtypes("object")

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_county,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_proj_scope_summary,a2_rtpa,a3_plan_active_trans_exists,a3_plan_bicycle_exists,a3_plan_ped_exists,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_ped_applies,a3_st_srts,a3_trail_fed_funding,a3_trails,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_letters_of_support,attch_ni_workplan,attch_project_estimate,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_other_bike_improv_1,b_other_bike_improv_2,m_other_trail_imprv_1,m_other_trail_imprv_2,p_other_ped_imprv_1,p_other_ped_imprv_2,p_amenities_shade_tree_type,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_other_traffic_calming_imprv_2,a4_reg_init,a4_com_init,a4_safe_route,a4_fl_mile,a4_other_ni,a4_other_ni_descr,a4_act_other_1_descr,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y
0,Merced,10-5939R,Yes,Merced County,00033S,345 west 7th street,Deputy Director,,,No,,Merced,"PA&ED, PS&E, and CON funding for construction ...",1) South side of Haskell Ave from Cody ave to ...,Planada Sidewalk Infill Project,Project is located outside one of the ten larg...,MCAG,No,The Planada Sidewalk Infill Project is located...,,No,Yes,Yes,No,Infrastructure - Small,Yes,Yes,Yes,,No,Planada Sidewalk infill ATP cross section 1.pdf,Attachment A- Signature Page.pdf,Existing Photos Attachment.pdf,Planada ATP Plan Concept.pdf,Attachment-B-Engr-Checklist (MH).pdf,Letters of Support.pdf,,Project Estimate.pdf,10-Merced County-1,CYCLE 5,N,5939,No,No,,Sidewalk infill along portions of Haskell aven...,No,,,,,,,,No,No,Yes,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N
1,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,,,No,,Orange,Bishop Street Class 3 Bicycle Boulevard with T...,Bishop Street from Flower Street to Standard A...,Bishop Street Bicycle Boulevard Project,Project is located within one of the ten large...,SCAG,Yes,This project will implement a Class 3 bicycle ...,,Yes,Yes,No,Yes,Infrastructure - Medium,Yes,Yes,No,,No,Attachment K - Not Applicable.pdf,Attachment A - Signature Page.pdf,Attachment E - Photos of Existing Conditions.pdf,Attachment D - Project .Plans.pdf,Attachment B - Checklist.pdf,Attachment I - Letter of Support.pdf,Attachment G - Not Applicable.pdf,Attachment F - Cost .Estimate.pdf,"12-Santa Ana, City of-4",CYCLE 5,N,5063,No,No,,"Install 1.15 mile bike boulevard, construction...",Yes,,,,,,,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N
2,City of Pacifica,04-5350-F15,Yes,"Pacifica, City of",,151 Milagra Drive,Associate Civil Engineer,,,No,,San Mateo,CON funding for installing bicycling facilitie...,On Palmetto Ave between Paloma Ave and West Av...,Palmetto Ave - Esplanade Ave Bicycle & Pedestr...,Project is located outside one of the ten larg...,MTC,No,The project will install a combination of Clas...,,No,Yes,Yes,No,Infrastructure - Small,Yes,Yes,No,,No,,Attachment-A-Signature-page.pdf,Photos.pdf,Attachment D_Palmetto & Esplanade Ped-Bike Imp...,Attachment B_Engineers Checklist.pdf,Letters of Support.pdf,,Attachment F_ ATP Cycle 5_Palmetto-Esplanade B...,"4-Pacifica, City of-1",CYCLE 5,N,5350,0,0,,Bicycling and pedestrian amenities will be ins...,Yes,,,,,,,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N
3,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,,,No,,Orange,Pedestrian traffic safety improvements for Jef...,"In the City of Santa Ana, the safe routes to s...",Jefferson ES_Thorpe Fundamental_McFadden Int_G...,Project is located within one of the ten large...,SCAG,Yes,"This project will be repairing, replacing and ...",,Yes,Yes,No,Yes,Infrastructure - Large,No,Yes,Yes,,No,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D -Plans.pdf,Attachment B - Check list.pdf,Attachment I - Letter of Support.pdf,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,"12-Santa Ana, City of-13",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 8 intersections, ...",Yes,,,,,Left Turn Arrow,Enhanced Crosswalk Unsignalized,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N
4,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,,,No,,Orange,Pedestrian traffic safety improvements for La...,"In the City of Santa Ana, the safe routes to s...",Lathrop Intermediate_Lowell ES_Martin ES_Pio P...,Project is located within one of the ten large...,SCAG,Yes,"This project will be repairing, replacing and ...",,Yes,Yes,No,Yes,Infrastructure - Large,No,Yes,Yes,,No,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D - Plan.pdf,Attachment B - Checklist.pdf,Attachment I - Letter of Support.pdf,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,"12-Santa Ana, City of-14",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 6 intersections, ...",Yes,,,,,Enhance crosswalk (unsignalized),Raised Crosswalk,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
877,Thousand Oaks,07-5292F15,Yes,"Thousand Oaks, City of",00252,2100 Thousand Oaks Blvd.,Transportation Planner,,,No,,Ventura,"Construction funding for Class IV bikelanes, ...","In the City of Thousand Oaks, Lynn Road betwee...",Lynn Road Bike Lanes and Pedestrain Improvements,Project is located within one of the ten large...,SCAG,No,The project is located on 4.5-miles of Lynn R...,,Yes,No,No,No,Infrastructure - Small,Yes,Yes,No,,No,ATTACHMENT K.pdf,Attachement A Signed.pdf,photoskn.pdf,Lynn Concept Plans.pdf,Attachment-B-Engr-Checklist_Lynn_SB.pdf,Letters of Support.pdf,,Attachment-F-Project-Estimate_Lynn_SB.xlsx,"7-Thousand Oaks, City of-1",CYCLE 6,N,5392,No,Yes,Local Road Safety Plan,"265' new sidewalk, 2 rapid flashing beacons, 1...",Yes,,,,,,,,No,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N
878,South El Monte,,Yes,"South El Monte, City of",07-5352S21,1415 Santa Anita Avenue,Community Development Director,Partner Agency Letter of Intent.pdf,City of El Monte,Yes,City Engineer,Los Angeles,Construct Class II bike lane segments; install...,Merced Avenue from Garvey Avenue to Fern Stree...,Merced Avenue Greenway,Project is located within one of the ten large...,SCAG,Yes,The project will implement bicyclist/pedestria...,,No,Yes,No,No,Infrastructure - Small,Yes,Yes,No,,No,Att K - Support Docs.pdf,Att A - Signature Page.pdf,Att E - Photos of Existing Conditions.pdf,Att D - Project Plans.pdf,Att B - Eng Checklist.pdf,Att I - Letters of Support.pdf,Att G - Not Applicable.pdf,Att F - Project Estimate.xlsx,"7-South El Monte, City of-1",CYCLE 6,N,5352,0,0,,Construct 0.97-mile Class II bike path; 4 enha...,Yes,,,,,,,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N
879,San Jose,04-5005F15,Yes,"San Jose, City of",00200S,200 E Santa Clara St,Senior Transportation Specialist,,,No,,Santa Clara,This project will decouple 2nd and 3rd street ...,The project is in SoFA arts district in southw...,2nd & 3rd Street De-Coupling and Complete Stre...,Project is located within one of the ten large...,MTC,Yes,"The City of San José, through its Downtown Tra...",,No,Yes,No,No,Infrastructure - Large,Yes,Yes,No,,No,attachment k.pdf,Attachment-A-Signature-Page (1)_jr (1).pdf,Attachment_G_Site_Photos.pdf,2_3DESIGNS.pdf,Attachment-B-Engr-Checklist- 2nd and 3rd.pdf,LOS.pdf,,2nd and 3rd ATP Engineers Estimate_Final.pdf,"4-San Jose, City of-3",CYCLE 6,N,5005,0,0,"Emerging mobility Action Plan, Carbon Neutral ...",Project constructs approximately 6840 feet of ...,Yes,Bike Ramps,Raised Intersections,,,Fully Bulbed (all 4 corners),,,Yes,No,No,Conversion of 1 to 2 way operation,<---- 0.68 miles,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N
880,Santa Barbara,05-5951R,Yes,Santa Barbara County,00100S,123 E. Anapuma St,Alternative Transportation Manager,,,No,,Santa Barbara,"Curb extensions, sidewalks and crosswalks for ...",Unincorporated neighborhood located south of E...,Isla Vista Bike and Pedestrian Improvements Pr...,Project is located outside one of the large MP...,SBCAG,Yes,"Isla Vista is a place like no other. 15,733 pe...",,Yes,No,No,No,Infrastructure + NI - Medium,Yes,Yes,Yes,,No,,Attachment A_Signature Page - 2022.pdf,Existing Conditions Photos.pdf,Isla Vista Community Improvements - ATP Cycle ...,Attachment B-Engr Checklist IV.pdf,Attachment I - Letters of Support 2022.pdf,Attachment-G-Exhibit-25-R-NI-Work-Plan - Isla ...,Attachment-F-Project-Estimate-IV Updated.pdf,5-Santa Barbara County-2,CYCLE 6,N,5951,0,0,Regional Transportation Plan,"Curb extensions, sidewalks, and bicycle networ...",Yes,Bike left-hand turn lanes,Class 2 conflict / intersection striping,,,,,,No,No,Yes,,,Y,N,Y,N,N,,,,N,N,Y,Y,N,N,,"Spanish, Mandarin",Y,N,Y,N,N,N,,N,N,N,N,No,N


In [117]:
gdf.select_dtypes("float64")

Unnamed: 0,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_b,a2_congress_dist_c,a2_proj_lat,a2_proj_long,a2_senate_dist_b,a2_senatedistc,p_un_sig_inter_new_roundabout,a4_emp_based_pct,a4_le_methods,a4_srts_le
0,,,,,37.00,120.00,,,0.00,0.00,0.00,0.00
1,,,,,33.00,117.00,,,6.00,0.00,0.00,0.00
2,,,,,37.00,-122.00,,,0.00,0.00,0.00,0.00
3,,,,,33.00,117.00,,,0.00,0.00,0.00,0.00
4,,,,,33.00,117.00,,,0.00,0.00,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...
877,4.00,4.00,2.00,6.00,,,2.00,7.00,,,,
878,,,,,,,,,,,,
879,,,,,,,,,,,,
880,3.00,7.00,2.00,4.00,,,1.00,9.00,,,,


#### Columns to add
* a2_county acronym
*

In [118]:
df>>select(_.a2_county)

Unnamed: 0,a2_county
0,Merced
1,Orange
2,San Mateo
3,Orange
4,Orange
...,...
877,Ventura
878,Los Angeles
879,Santa Clara
880,Santa Barbara
