# ATP Data Exploration

In [1]:
import numpy as np
import pandas as pd

from calitp_data_analysis.sql import to_snakecase

from dla_utils import _dla_utils

from siuba import *



In [2]:
pd.set_option("display.max_columns", 220)

In [3]:
GCS_FILE_PATH = 'gs://calitp-analytics-data/data-analyses/dla/atp/'


## Reading in w/o utils

In [4]:
main_details = to_snakecase(
    pd.read_excel(f"{GCS_FILE_PATH}Main Details.xls")
)
project_details = to_snakecase(
    pd.read_excel(
        f"{GCS_FILE_PATH}Project Details.xls"
    )
)

In [5]:
# main_details.info()

In [6]:
# project_details.project_cycle.value_counts()

In [7]:
# project_details>>count(_.project_app_id)>>filter(_.n>1)

In [8]:
# project_details>>group_by(_.project_cycle)>>count(_.project_app_id)>>filter(_.n>1)

* multiple project ids, but no duplicates by project cycle. 

In [9]:
## merging

In [10]:
# df = pd.merge(main_details, project_details, how="outer", on=["project_app_id", "project_cycle"], indicator='matches')

In [11]:
# (df>>filter(_.project_app_id =='1-Mendocino Council of Governments-1')>>select(_.project_app_id,
#                                                                               _.project_cycle,
#                                                                              _.matches,
#                                                                               _.agency_app_num))

In [12]:
# df.info()

### Comparing column names

code help: https://stackoverflow.com/questions/45482755/compare-headers-of-dataframes-in-pandas

In [13]:
# columns in both dfs
main_details.columns.intersection(project_details.columns)

Index(['project_app_id', 'project_cycle', 'awarded'], dtype='object')

In [14]:
# columns in main_details not in project_details
main_details.columns.difference(project_details.columns)

Index(['a1_imp_agcy_city', 'a1_imp_agcy_contact', 'a1_imp_agcy_email',
       'a1_imp_agcy_fed_ma_num', 'a1_imp_agcy_ma', 'a1_imp_agcy_name',
       'a1_imp_agcy_phone', 'a1_imp_agcy_state_ma_num', 'a1_imp_agcy_street',
       'a1_imp_agcy_title', 'a1_imp_agcy_zip', 'a1_letter_of_intent',
       'a1_locode', 'a1_proj_partner_agcy', 'a1_proj_partner_contact',
       'a1_proj_partner_email', 'a1_proj_partner_exists',
       'a1_proj_partner_phone', 'a1_proj_partner_title', 'a2_assem_dist_a',
       'a2_assem_dist_b', 'a2_assem_dist_c', 'a2_congress_dist_a',
       'a2_congress_dist_b', 'a2_congress_dist_c', 'a2_county', 'a2_ct_dist',
       'a2_info_proj_descr', 'a2_info_proj_loc', 'a2_info_proj_name',
       'a2_mop_uza_population', 'a2_mpo', 'a2_output_outcome', 'a2_past_proj',
       'a2_past_proj_qty', 'a2_proj_lat', 'a2_proj_long',
       'a2_proj_scope_summary', 'a2_project_location_map', 'a2_rtpa',
       'a2_senate_dist_a', 'a2_senate_dist_b', 'a2_senatedistc',
       'a3_current

In [15]:
# columns in project_details not in main_details
project_details.columns.difference(main_details.columns)

Index(['a4_act_other_1', 'a4_act_other_1_descr', 'a4_act_other_2',
       'a4_act_other_2_decr', 'a4_after_school', 'a4_bike_classes',
       'a4_bike_gap_pct', 'a4_bike_rodeos', 'a4_bike_train', 'a4_classrooms',
       ...
       'v_other_traffic_calming_imprv_2', 'v_other_traffic_calming_qty_1',
       'v_other_traffic_calming_qty_2', 'v_remove_right_turn_pocket',
       'v_remove_travel_ln', 'v_sig_inter_new_roundabout',
       'v_sig_inter_timing_improv', 'v_speed_feedback_signs',
       'v_un_sig_inter_new_roundabout', 'v_un_sig_inter_new_traf_sig'],
      dtype='object', length=132)

## Reading in w/ utils

In [16]:
import utils

In [17]:
df = utils.read_in_data()

In [18]:
df.head()

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_exhibit22_plan,attch_letters_of_support,attch_link,attch_ni_workplan,attch_project_estimate,completed_pdf_form,main_datetime_stamp,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,matches
0,Merced,10-5939R,Yes,Merced County,00033S,345 west 7th street,Deputy Director,95340,,,No,,21,,,16,,,Merced,10,"PA&ED, PS&E, and CON funding for construction ...",1) South side of Haskell Ave from Cody ave to ...,Planada Sidewalk Infill Project,Project is located outside one of the ten larg...,MCAG,No,0,37.29,120.31,The Planada Sidewalk Infill Project is located...,,,12,,,,No,,Yes,,Yes,,No,Infrastructure - Small,Yes,20,1,Yes,80,Yes,0,,0,No,1,1802,Planada Sidewalk infill ATP cross section 1.pdf,Attachment A- Signature Page.pdf,Existing Photos Attachment.pdf,Planada ATP Plan Concept.pdf,Attachment-B-Engr-Checklist (MH).pdf,,Letters of Support.pdf,,,Project Estimate.pdf,,2020-06-09 10:33:08,10-Merced County-1,CYCLE 5,N,5939,No,No,,Sidewalk infill along portions of Haskell aven...,No,0,0,0,1500,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,0,0,0,0,0,6,0,1500,0,,0,,0,5,0,4,3,0,0,0,0,0,,0,0.0,0,0,0,0,No,No,Yes,,0,0,,0,0,0,0,0,0,0,1802,2020-06-09 10:33:08,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both
1,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Bishop Street Class 3 Bicycle Boulevard with T...,Bishop Street from Flower Street to Standard A...,Bishop Street Bicycle Boulevard Project,Project is located within one of the ten large...,SCAG,Yes,2,33.74,117.86,This project will implement a Class 3 bicycle ...,,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Medium,Yes,50,0,Yes,50,No,0,,0,No,4,1811,Attachment K - Not Applicable.pdf,Attachment A - Signature Page.pdf,Attachment E - Photos of Existing Conditions.pdf,Attachment D - Project .Plans.pdf,Attachment B - Checklist.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost .Estimate.pdf,,2020-08-20 18:49:12,"12-Santa Ana, City of-4",CYCLE 5,N,5063,No,No,,"Install 1.15 mile bike boulevard, construction...",Yes,0,0,0,6336,0,,0,0,0,0,0,0,0,,0,,0,0,2,0,0,,0,0,0,0,,0,,0,0,0,100,0,0,0,0,0,0,0,0,,0,,0,38,0,15,16,0,18,3,0,0,,1,6.0,0,18,0,0,Yes,No,No,,0,0,,0,0,8800,0,0,0,0,1811,2020-08-20 18:49:12,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both
2,City of Pacifica,04-5350-F15,Yes,"Pacifica, City of",,151 Milagra Drive,Associate Civil Engineer,94044,,,No,,22,,,14,,,San Mateo,4,CON funding for installing bicycling facilitie...,On Palmetto Ave between Paloma Ave and West Av...,Palmetto Ave - Esplanade Ave Bicycle & Pedestr...,Project is located outside one of the ten larg...,MTC,No,0,37.65,-122.49,The project will install a combination of Clas...,,,13,,,,No,,Yes,,Yes,,No,Infrastructure - Small,Yes,50,2,Yes,50,No,0,,0,No,1,1804,,Attachment-A-Signature-page.pdf,Photos.pdf,Attachment D_Palmetto & Esplanade Ped-Bike Imp...,Attachment B_Engineers Checklist.pdf,,Letters of Support.pdf,,,Attachment F_ ATP Cycle 5_Palmetto-Esplanade B...,,2020-06-15 11:05:03,"4-Pacifica, City of-1",CYCLE 5,N,5350,0,0,,Bicycling and pedestrian amenities will be ins...,Yes,0,0,13752,5748,0,,0,0,0,0,0,0,0,,0,,0,0,0,1,0,,0,0,0,0,,0,,0,0,0,40,2,0,0,0,20,0,0,0,,0,,0,9,0,0,0,0,0,0,0,0,,0,0.0,0,0,0,0,Yes,No,No,,0,0,,0,0,0,0,0,0,0,1804,2020-06-15 11:05:03,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both
3,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Pedestrian traffic safety improvements for Jef...,"In the City of Santa Ana, the safe routes to s...",Jefferson ES_Thorpe Fundamental_McFadden Int_G...,Project is located within one of the ten large...,SCAG,Yes,2,33.71,117.89,"This project will be repairing, replacing and ...",,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Large,No,0,5,Yes,100,Yes,0,,0,No,13,1822,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D -Plans.pdf,Attachment B - Check list.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,,2020-09-08 10:15:52,"12-Santa Ana, City of-13",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 8 intersections, ...",Yes,0,0,0,0,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,50,0,0,0,0,60,0,0,0,Left Turn Arrow,3,Enhanced Crosswalk Unsignalized,3,218,1000,7,0,0,1,0,0,0,,0,0.0,0,7,0,0,Yes,No,No,,0,0,,0,0,0,0,0,0,0,1822,2020-09-08 10:15:52,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both
4,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Pedestrian traffic safety improvements for La...,"In the City of Santa Ana, the safe routes to s...",Lathrop Intermediate_Lowell ES_Martin ES_Pio P...,Project is located within one of the ten large...,SCAG,Yes,4,33.73,117.87,"This project will be repairing, replacing and ...",,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Large,No,0,5,Yes,100,Yes,0,,0,No,14,1823,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D - Plan.pdf,Attachment B - Checklist.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,,2020-08-31 12:34:31,"12-Santa Ana, City of-14",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 6 intersections, ...",Yes,0,0,0,0,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,50,0,0,0,0,43,0,0,0,Enhance crosswalk (unsignalized),7,Raised Crosswalk,2,189,3455,5,0,0,1,0,0,0,,0,0.0,2,5,0,0,Yes,No,No,,0,0,,0,0,0,0,0,0,0,1823,2020-08-31 12:34:31,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both


In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 882 entries, 0 to 881
Columns: 211 entries, a1_imp_agcy_city to matches
dtypes: category(1), datetime64[ns](2), float64(23), int64(97), object(88)
memory usage: 1.4+ MB


In [20]:
df.matches.value_counts()

both          882
left_only       0
right_only      0
Name: matches, dtype: int64

In [21]:
df.awarded_y.value_counts()

N    882
Name: awarded_y, dtype: int64

### Comparing merged df with cleaned data

In [22]:
#removing columns with agency staff information from cleaned df
columns_to_drop = [
    "a1_imp_agcy_contact",
    "a1_imp_agcy_email",
    "a1_imp_agcy_phone",
    "a1_proj_partner_contact",
    "a1_proj_partner_email",
    "a1_proj_partner_phone",
]

In [23]:
alldata = to_snakecase(
    pd.read_excel(
        "gs://calitp-analytics-data/data-analyses/dla/atp/Master_AllData_Cycle5FieldMapping.xls",
        sheet_name="AllData",
    )
)

In [24]:
alldata = alldata.drop(columns=columns_to_drop)

In [25]:
alldata.sample()

Unnamed: 0,awarded,project_cycle,a2_ct_dist,#,atp_id,ppno,ppno_1,a3_proj_type,project_app_id,a2_info_proj_name,a2_county,a1_locode,a1_imp_agcy_name,a1_imp_agcy_street,a1_imp_agcy_city,a1_imp_agcy_zip,a1_imp_agcy_title,a1_imp_agcy_ma,a1_imp_agcy_state_ma_num,a1_imp_agcy_fed_ma_num,a1_proj_partner_exists,a1_proj_partner_agcy,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a2_info_proj_descr,a2_info_proj_loc,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_current_plan,a3_trails,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo
381,N,CYCLE 5,1,,,,,Infrastructure - Small,1-Lake County-1,Konocti Road Safe Routes To School Project,Lake,5914,Lake County,255 N. Forbes St.,Lakeport,95453,Associate Civil Engineer,Yes,00172S,01-5914R,No,,,4,,,5,,,2,,,"Construct new sidewalks, ADA Compliant curb ra...",Along the south side of Konocti Road between t...,Project is located outside one of the ten larg...,Caltrans,Yes,1,38.97,-122.82,Currently there is a gap in sidewalks along th...,,Lake CCAPC,,Yes,,Yes,,No,,Yes,Yes,10,2,Yes,90,Yes,0,,0,Yes,No,0,0,Lake Co. Pedestrian Facility Needs Study (Lake...,Construction of these new sidewalks will provi...,0,0,50,0,0,,0,0,0,0,0,0,0,Pavement markings,10,,0,0,0,0,4,,0,0,0,0,,0,,0,0,0,100,0,0,0,0,8,2,830,0,,0,,0,2,80,0,0,0,0,0,0,0,,0,0,0,0,4,0,Yes,No,No,,0,0,,0,0,0,0,0,0,0,N,0,N,0,N,0,N,0,,0,N,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No


In [26]:
cleaned = to_snakecase(
    pd.read_excel(
        "gs://calitp-analytics-data/data-analyses/dla/atp/Master_AllData_Cycle5FieldMapping.xls",
        sheet_name="AllDataFieldMapping Cleaned",
    )
)

In [27]:
cleaned.sample()

Unnamed: 0,awarded,project_cycle,a2_ct_dist,#,atp_id,ppno,ppno_1,a3_proj_type,project_app_id,a2_info_proj_name,a2_county,a1_locode,a1_imp_agcy_name,a1_imp_agcy_street,a1_imp_agcy_city,a1_imp_agcy_zip,a1_imp_agcy_contact,a1_imp_agcy_title,a1_imp_agcy_email,a1_imp_agcy_phone,a1_imp_agcy_ma,a1_imp_agcy_state_ma_num,a1_imp_agcy_fed_ma_num,a1_proj_partner_exists,a1_proj_partner_agcy,a1_proj_partner_contact,a1_proj_partner_title,a1_proj_partner_email,a1_proj_partner_phone,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,congressional_district,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,senate_district,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a2_info_proj_descr,a2_info_proj_loc,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_current_plan,a3_trails,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,...,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,original_prog__amt___pa_ed_,orig__prog__year__pa_ed_,original_prog__amt___ps_e_,orig__prog__year__ps_e_,original_prog__amt___rw_,orig__prog__year__rw_,orignal_prog__amt___con_,orig__prog__year__con_,original_prog__amt___con_ni_,orig__prog__year__con_ni_,unnamed:_215,prog__amount__paed__1,prog__amount__pse__1,prog__amount__rw__1,prog__amount__con__1,prog__amount__con_ni__1,unnamed:_221,fund_year_1,prog__amount__paed__2,prog__amount__pse__2,prog__amount__rw__2,prog__amount__con__2,prog__amount__con_ni__2,unnamed:_228,fund_year_2,prog__amount__paed__3,prog__amount__pse__3,prog__amount__rw__3,prog__amount__con__3,prog__amount__con_ni__3,unnamed:_235,fund_year_3,prog__amount__paed__4,prog__amount__pse__4,prog__amount__rw__4,prog__amount__con__4,prog__amount__con_ni__4,unnamed:_242,fund_year_4,unnamed:_244,unnamed:_245,unnamed:_246,unnamed:_247,unnamed:_248,unnamed:_249,unnamed:_250,unnamed:_251,unnamed:_252
204,N,5,7,,,,,Plan,"7-Hawaiian Gardens, City of-1",Bike Master Plan Preparation and General Plan ...,LA,,"Hawaiian Gardens, City of",21815 Pioneer Boulevard,Hawaiian Gardens,90716,Joe Colombo,Community Development Director,jcolombo@hgcity.org,562-420-2641,Yes,,07-5387F15,No,,,,,,63,63,,,38,38,,,32,32,,,The project prepares a Citywide Bicycle Master...,Citywide,Project is located within one of the ten large...,SCAG,Yes,2,33.83,-118.07,The City of Hawaiian Gardens does not have an ...,,,,No,,No,,No,,No,Yes,100,3,No,0,No,0,,0,No,No,0,0,,Prepare and present the draft Bicycle Master P...,0,0,0,0,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,0,0,0,...,0,0,0,0,0,0,0,N,0,N,0,N,0,N,0,,0,N,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,Y,N,N,No,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [28]:
# filter_col = [col for col in cleaned if col.startswith('unnamed')]

In [29]:
# remove columns that have some agency specific information
cleaned = cleaned.drop(columns=columns_to_drop)

In [30]:
# remove columns that were manually entered - last 34 columns
cleaned = cleaned.drop(columns=(cleaned.iloc[:, 199:]))

In [31]:
# remove columns that are blank and unnamed
# cleaned=cleaned.drop(columns=filter_col)

In [32]:
# cleaned.columns.get_loc("original_prog__amt___pa_ed_")

In [33]:
# (cleaned.iloc[:, 199:].columns.tolist())

In [34]:
# making sure they are null
# (cleaned.iloc[1:, 199:]).info()

In [35]:
cleaned.sample()

Unnamed: 0,awarded,project_cycle,a2_ct_dist,#,atp_id,ppno,ppno_1,a3_proj_type,project_app_id,a2_info_proj_name,a2_county,a1_locode,a1_imp_agcy_name,a1_imp_agcy_street,a1_imp_agcy_city,a1_imp_agcy_zip,a1_imp_agcy_title,a1_imp_agcy_ma,a1_imp_agcy_state_ma_num,a1_imp_agcy_fed_ma_num,a1_proj_partner_exists,a1_proj_partner_agcy,a1_proj_partner_title,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,congressional_district,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,senate_district,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a2_info_proj_descr,a2_info_proj_loc,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_current_plan,a3_trails,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo
108,N,5,7,,,,,Infrastructure - Large,"7-Los Angeles, City of-6",Hollywood Walk of Fame Safety and Connectivity...,LA,5006,"Los Angeles, City of",1149 South Broadway #800,Los Angeles,90015,Project Manager,Yes,00152S,07-5006F15,No,,,50,50,,,28,28,,,26,26,,,"Protected bicycle lanes, extended sidewalks, a...",Hollywood Boulevard between Gower and Wilcox i...,Project is located within one of the ten large...,SCAG,Yes,2,34.1,-118.33,Hollywood Boulevard between Gower and Wilcox a...,,,,No,,No,,No,,No,Yes,50,0,Yes,50,No,0,,0,Yes,No,No,Yes,Mobility Element of General Plan (Mobility 2035),"Building extended sidewalks with shade trees, ...",0,0,0,0,5200,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,0,0,0,0,0,0,5200,0,0,Bollard protected corners,24,Raised Intersections incl. rsd crssngs,6,0,0,26,0,48,26,12,0,101,Climate appropriate canopy,0,0,0,2,2,3950,Yes,No,No,Raised Intersections,0,6,Raised Crossings,2,11,3950,0,104,0,0,N,0,N,0,N,0,N,0,,0,N,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No


### How do the merged and cleaned up columns match up?

In [36]:
df.columns.intersection(cleaned.columns)

Index(['a1_imp_agcy_city', 'a1_imp_agcy_fed_ma_num', 'a1_imp_agcy_ma',
       'a1_imp_agcy_name', 'a1_imp_agcy_state_ma_num', 'a1_imp_agcy_street',
       'a1_imp_agcy_title', 'a1_imp_agcy_zip', 'a1_proj_partner_agcy',
       'a1_proj_partner_exists',
       ...
       'a4_collab_non_profit', 'a4_collab_schools', 'a4_collab_pub_works',
       'a4_collab_other', 'a4_colab_other_descr', 'a4_plan_ped',
       'a4_plan_bike', 'a4_plan_atp', 'a4_plan_school_routes',
       'a4_row_open_street_demo'],
      dtype='object', length=191)

In [37]:
# columns in df not in cleaned
df.columns.difference(cleaned.columns)

Index(['a1_letter_of_intent', 'agency_app_num', 'app_fk', 'app_pk',
       'attch_addtl_attachments', 'attch_app_sig_page',
       'attch_conditions_photos', 'attch_conditions_project_map',
       'attch_engineeers_checklist', 'attch_exhibit22_plan',
       'attch_letters_of_support', 'attch_link', 'attch_ni_workplan',
       'attch_project_estimate', 'awarded_x', 'awarded_y',
       'completed_pdf_form', 'details_datetime_stamp', 'main_datetime_stamp',
       'matches'],
      dtype='object')

In [38]:
# columns in cleaned not in df
cleaned.columns.difference(df.columns)

Index(['#', 'assembly_district', 'atp_id', 'awarded', 'congressional_district',
       'ppno', 'ppno_1', 'senate_district'],
      dtype='object')

In [39]:
len(cleaned)

454

## Assembly, Congressional and Senate Districts

In [40]:
ad = (
    cleaned
    >> select(
        _.a1_imp_agcy_name,
        _.assembly_district,
        _.a2_assem_dist_a,
        _.a2_assem_dist_b,
        _.a2_assem_dist_c,
        # _.congressional_district,
        # _.a2_congress_dist_a,
        # _.a2_congress_dist_b,
        # _.a2_congress_dist_c,
        # _.senate_district,
        # _.a2_senate_dist_a,
        # _.a2_senate_dist_b,
        # _.a2_senatedistc,
    )
)

In [41]:
ad.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 454 entries, 0 to 453
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   a1_imp_agcy_name   454 non-null    object 
 1   assembly_district  454 non-null    object 
 2   a2_assem_dist_a    454 non-null    int64  
 3   a2_assem_dist_b    62 non-null     float64
 4   a2_assem_dist_c    29 non-null     float64
dtypes: float64(2), int64(1), object(2)
memory usage: 17.9+ KB


### Unsuccessful Methods

In [42]:
## Need to join two columns together if they have values

In [43]:
## code help: https://stackoverflow.com/questions/52889130/how-to-remove-zeros-after-decimal-from-string-remove-all-zero-after-dot
#ad["a2_assem_dist_b"].map("{0:g}".format)

In [44]:
##code help:
# https://stackoverflow.com/questions/49091259/pandas-looping-through-rows-and-skipping-over-rows
# https://stackoverflow.com/questions/36774602/concatenate-two-numerical-values-to-make-a-new-column-using-pandas

In [45]:
# ## Code help: https://stackoverflow.com/questions/56119307/pandas-conditionally-concat-two-columns
# mask = (ad["a2_assem_dist_b"] < 10).fillna(False)

# ad["assem_dist_combined2"] = ad.loc[mask, "a2_assem_dist_b"].map("{0:g}".format) + ad[
#     "a2_assem_dist_c"
# ].map("{0:g}".format)

In [46]:
# ad["assem_dist_combined2"] = np.where(
#         ad.assem_dist_combined2.isnull(),
#         ad["a2_assem_dist_b"], ad["assem_dist_combined2"])

In [47]:
# ad["assem_dist_combined2"] = np.where(
#         ad.assem_dist_combined2.isnull(),
#         (ad["a2_assem_dist_b"].map("{0:g}".format) + ', ' + ad["a2_assem_dist_c"].map("{0:g}".format)),
#          ad["assem_dist_combined2"])

In [48]:
## another attempt:

## this will combine all 
## code help: https://stackoverflow.com/questions/55526620/how-to-combine-non-null-entries-of-columns-of-a-dataframe-into-a-new-column
#df["assem_dist_combined3"] = df.agg(lambda x: x.dropna().str.cat(sep=','), axis=1)

## this combines set columns
## code help: https://stackoverflow.com/questions/45787782/combine-multiple-columns-in-pandas-excluding-nans
# cols = ['a2_assem_dist_b', 'a2_assem_dist_c']
# ad["assem_dist_combined3"] = ad[cols].agg(lambda x: x.dropna().tolist(), axis=1)


### Function

Requirements for function:
* when `a2_assem_dist_a` == 0 AND `a2_assem_dist_b` & `assem_dist_c` are less than 10, **then combine of `2_assem_dist_b` & `assem_dist_c` to one number.**
* when `a2_assem_dist_a` is less than 10 AND `a2_assem_dist_b` is less than 10 AND `assem_dist_c` is null, **then take combine `2_assem_dist_a` & `assem_dist_b` (can be one number or two)**
* when `a2_assem_dist_a` == 1 AND `a2_assem_dist_b` is less than 10, **then combine `a2_assem_dist_a` & `2_assem_dist_b` with a comma**
* when `a2_assem_dist_a` is notnull AND `a2_assem_dist_b` & `assem_dist_c` are null,** then `ssembly_district`== `2_assem_dist_a`**
* when `a2_assem_dist_a` & `a2_assem_dist_b` is >= 10 AND `assem_dist_c` is null, **then take combination of `2_assem_dist_a` & `assem_dist_b` with a comma**
* when `a2_assem_dist_a` & `a2_assem_dist_c` is >= 10 AND `assem_dist_b` is null, **then take combination of `2_assem_dist_a` & `assem_dist_c` with a comma**


In [49]:
def format_districts(df, col_a, col_b, col_c, new_col):
    
    #rename columns to alias
    df = df.rename(columns = {col_a:'a',
                              col_b:'b',
                              col_c:'c'})
    #fix types
    df = df.astype({'a':'Int64',
                    'b':'Int64',
                    'c':'Int64'})
    
    #replace null values with numeric
    df["a"].fillna(9999999, inplace=True)
    df["b"].fillna(9999999, inplace=True)
    df["c"].fillna(9999999, inplace=True)
    
    def district_status(row):
        if (row.a == 0) and (row.b < 10) and (row.c < 10):
            return (str(row["b"])) + (str(row["c"]))
        
        elif (row.a < 10) and (row.b < 10) and not (row.c == 9999999):
            return (str(row["a"])) + (str(row["b"]))
        
        elif (row.a>=1) and (row.b == 9999999) and (row.c == 9999999):
            return (row["a"])
        
        elif (row.a >= 10) and (row.b>= 10) and (row.c == 9999999):
            return (str(row["a"])) + ', ' + (str(row["b"]))
        
        elif (row.a >= 10) and (row.b == 9999999)  and (row.c >= 10):
            return  (str(row["a"])) + ', ' + (str(row["c"]))
        
        elif (row.a >= 1) and (row.b == 0) and (row.c == 0):
            return  (str(row["a"])) 
        
        elif (row.a >= 1) and not (row.b == 0) and not (row.b == 9999999) and not (row.c == 9999999):
            return  (str(row["a"])) + ', ' + (str(row["b"])) + ', ' + (str(row["c"]))
        
        else:
            return "Needs Manual Assistance"
    
    #apply function
    df[new_col] = df.apply(lambda x: district_status(x), axis=1)
    
    #replace values back to null
    df = df.replace({'a': 9999999, 'b': 9999999, 'c':9999999}, np.nan)
    
    #rename columns back to original
    df = df.rename(columns = {'a':col_a,
                              'b':col_b,
                              'c':col_c})
  
    return df
    

In [50]:
## test on the subsetted df 
## still using cleaned df
(format_districts(ad, "a2_assem_dist_a", "a2_assem_dist_b", "a2_assem_dist_c", "assembly_district2")).sample(20)

Unnamed: 0,a1_imp_agcy_name,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,assembly_district2
242,"Porterville, City of",26,26,,,26
122,Santa Barbara County,35,0,3.0,5.0,35
407,Solano County,14,14,,,14
89,"Delano, City of",32,0,3.0,2.0,32
135,Transportation Agency for Monterey County,30,30,,,30
418,"Lancaster, City of",36,36,,,36
120,"Fontana, City of",47,47,,,47
253,"Los Angeles, City of","53, 59",53,59.0,,"53, 59"
309,"Paradise, Town of",3,3,,,3
371,Shasta County,1,1,,,1


In [51]:
## check which ones dont fit the arguments 
## still using cleaned df

(format_districts(ad, "a2_assem_dist_a", "a2_assem_dist_b", "a2_assem_dist_c", "assembly_district2"))>>filter(_.assembly_district2=="Needs Manual Assistance")

Unnamed: 0,a1_imp_agcy_name,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,assembly_district2
6,Butte County,"1, 3",1,3,,Needs Manual Assistance
12,"San Bernardio, City of",40,4,0,,Needs Manual Assistance
276,Yolo County,"4, 7",4,7,,Needs Manual Assistance
365,"Lynwood, City of","6, 3",6,3,,Needs Manual Assistance
380,"Vallejo, City of","14, 4",14,4,,Needs Manual Assistance


In [52]:
## test on main df with assembly districts 
df = ((format_districts(df, "a2_assem_dist_a", "a2_assem_dist_b", "a2_assem_dist_c", "assembly_district"))
 )
(df>>select(_.a2_assem_dist_a, _.a2_assem_dist_b, _.a2_assem_dist_c, _.assembly_district)).sample(20)

Unnamed: 0,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,assembly_district
422,7,,,7
710,22,,,22
731,13,,,13
123,44,,,44
404,70,,,70
856,35,,,35
829,30,,,30
29,69,,,69
254,13,,,13
660,5,,,5


In [53]:
## apply function for the other two columns: congressional district and senate district

df = (format_districts(df, "a2_congress_dist_a", "a2_congress_dist_b", "a2_congress_dist_c", "congressional_district"))
df = (format_districts(df, "a2_senate_dist_a", "a2_senate_dist_b", "a2_senatedistc", "senate_district"))

In [54]:
df.sample(5)

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_exhibit22_plan,attch_letters_of_support,attch_link,attch_ni_workplan,attch_project_estimate,completed_pdf_form,main_datetime_stamp,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,matches,assembly_district,congressional_district,senate_district
502,Palm Desert,08-5414S21,Yes,"Palm Desert, City of",08-5414S21,73-510 Fred Waring Drive,Deputy Director of Public Works,92260,,,No,,42,,,36,,,Riverside,8,Construct Class IV Bicycle/Small Electric Vehi...,"Located in the middle of the Coachella Valley,...",Palm Desert Bicycle/Low Speed Electric Vehicle...,Project is located within one of the ten large...,SCAG,No,0,,,The City of Palm Desert is developing a bicycl...,,,28,,,,No,,No,,No,,No,Infrastructure + NI - Small,Yes,100,0,No,0,No,0,,0,No,1,3373,,ATP Cycle 6 Signature Page.pdf,Project Photos.pdf,Palm Desert Phase 1 Plans with existing.pdf,Eng Check List _PD.pdf,,Combined Letters.pdf,,Attachment-G-Exhibit-25-R-NI-Work-Plan filled.pdf,Attachment-F-Project-Estimate PD Link.pdf,,2022-06-14 15:41:27,"8-Palm Desert, City of-1",CYCLE 6,N,5414,0,0,"General Plan Circulation Element, CV Link Conn...","Construct 670 linear feet of Class II, 4310 li...",Yes,2,0,670,4310,6440,,0,0,0,0,0,0,0,,0,,0,0,3,0,0,,0,0,0,0,,0,,0,0,0,0,0,0,0,0,0,0,0,0,,0,,0,0,0,0,0,0,0,0,0,0,,0,,0,0,0,0,Yes,No,No,,0,0,,0,0,5510,0,0,0,0,3373,2022-06-14 15:41:28,N,0,Y,100,N,0,N,0,,,N,,0,0,5,0,5,2,,5,0,0,0,0,0,0,0,1,0,,0,0,,0,,Y,Y,Y,Y,Y,N,,English and Spanish,Y,Y,Y,Y,Y,N,,N,N,N,N,No,N,both,42,36,28
103,Sacramento,035924f15,Yes,Sacramento County,00090s,4111 Branch Center Road,Principal Civil Engineer,95827,,,No,,7,,,6,,,Sacramento,3,"Construct 12 curb extensions, 26 crosswalks, 3...",The projects are located in unincorporated Sou...,South Sacramento County Safe Routes to School ...,Project is located within one of the ten large...,SACOG,Yes,1,38.51,-121.46,The South Sacramento County Safe Routes to Sch...,,,6,,,,No,,Yes,,Yes,,No,Infrastructure + NI - Small,No,0,3,Yes,100,Yes,0,,0,No,2,2035,,Attachment A_South Sac SRTS.pdf,ATTACHMENT E - EXISTING CONDITIONS PHOTOS (FIN...,ATTACHMENT D - Existing and Proposed Condition...,Attachment-B-Engr-Checklists.pdf,,ATTACHMENT I - Letters of Support BINDER (FINA...,,Attachment-G-Ex-22-R_v4.xlsx,ATTACHMENT F - Engineer's Estimate - R03.pdf,,2020-09-18 09:52:49,3-Sacramento County-2,CYCLE 5,N,5924,No,No,,"Construct 12 curb extensions, 26 crosswalks, 3...",Yes,0,0,0,0,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,90,0,0,0,0,33,0,0,0,Accessible Bus Stop,1,hard median,1,0,0,1,25,0,1,0,0,0,,0,0.0,0,12,0,255,No,No,Yes,Speed Hump,0,2,,0,0,0,0,0,0,0,2035,2020-09-18 09:52:49,N,0,N,0,Y,100,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,6,3,6,6,0,0,0,9,0.0,3,6,Mobile Bicycle Repair Clinics,3,Community directed hot spot mapping meetings,Y,N,N,Y,N,Y,"Yard Signs, Flyers",Spanish,N,N,Y,Y,N,N,,N,N,N,N,No,N,both,7,6,6
257,Cathedral City,08-5430F15,Yes,"Cathedral City, City of",00263S,68700 Ave Lalo Guerrero,Director of Engineering/Public Works,92234,,,No,,56,,,36,,,Riverside,8,"The project constructs bike lanes, missing sid...",The project is located in Cathedral City along...,Downtown Cathedral City Connectors: Gap Closur...,Project is located within one of the ten large...,SCAG,Yes,3,33.78,-116.47,The City of Cathedral City intends to implemen...,,,28,,,,Yes,,No,,No,,No,Infrastructure - Medium,Yes,70,0,Yes,30,No,0,No,0,Yes,1,2246,Attachment K_AdditionalAttachments.pdf,Attachment A_Signature Page_executed.pdf,Attachment E_ExistingConditions.pdf,Attachment D_ProjectMap_Plans.pdf,Attachment B_Engr Checklist_executed.pdf,,Attachment I_Letters of Support_ALL.pdf,,,Attachment F_Project Estimate.pdf,,2020-09-15 11:16:06,"8-Cathedral City, City of-1",CYCLE 5,N,5430,0,0,,Directly benefits locally disadvantaged commun...,Yes,0,0,18760,0,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,3450,0,0,,0,,0,0,0,81,0,0,0,1,13,0,3510,0,Pedestrian Hybrid Beacon,1,New Crosswalk (unsignalized),7,1,820,0,0,0,0,0,0,0,,0,0.0,0,0,0,0,No,No,Yes,,2,0,,0,0,0,0,0,0,0,2246,2020-09-15 11:16:06,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,56,36,28
187,Riverside,08-5956R,Yes,Riverside County,00010S,"4065 County Circle Drive, P.O. Box 7600",Branch Chief for Injury Prevention Services,92507,,,No,,42,,,36,,,Riverside,8,Safe Routes for All Program-San Jacinto: Pre-K...,San Jacinto is located in western Riverside Co...,Riverside County Safe Routes for All - San Jac...,Project is located within one of the ten large...,SCAG,Yes,2,33.78,-116.96,The Safe Routes for All - San Jacinto Program ...,,,23,,,,No,,No,,No,,No,Non-Infrastructure,Yes,30,5,Yes,70,Yes,0,,0,No,10,2455,K. Additional Attachments.pdf,A. Signature Page.pdf,E. Existing Conditions.pdf,,,,I. Letters of Support.pdf,,G. Exhibit 22-R.pdf,,,2020-09-15 16:35:42,8-Riverside County-10,CYCLE 5,N,5956,0,0,WRCOG Active Transportation Plan,Improve pedestrian and bicycle safety among st...,No,0,0,0,0,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,0,0,0,0,0,0,0,0,0,,0,,0,0,0,0,0,0,0,0,0,0,,0,0.0,0,0,0,0,No,No,No,,0,0,,0,0,0,0,0,0,0,2455,2020-09-15 16:35:42,N,0,Y,60,Y,40,N,0,,0.0,N,,0,8,3,3,5,1,0.0,24,3,0,5,3,9,5,0,4,320,0.0,4,7,"campaigns (5 elementary school based, 1 colleg...",7,evaluation methods,Y,Y,Y,Y,Y,Y,"Presentations to older adults, parents, commun...",Spanish; All communication activities are bili...,Y,Y,Y,Y,Y,Y,"Western Riverside Council of Governments, City...",N,N,N,N,No,N,both,42,36,23
364,Morro Bay,05-5391 R,Yes,"Morro Bay, City of",05-000389,595 Harbor Street,City Engineer,93442,,,No,,0,3.0,5.0,0,2.0,4.0,San Luis Obispo,5,"In Morro Bay, at the SR1/SR41 interchange, con...","In the City of Morro Bay, California, at the i...",SR 1/SR 41 Interchange Operational Improvements,Project is located outside one of the ten larg...,SLOCOG,Yes,1,35.38,-120.86,Construction of a six leg roundabout at the SR...,,,0,1.0,7.0,,No,,Yes,,Yes,,No,Infrastructure - Large,Yes,50,2,Yes,50,Yes,0,,0,No,1,2516,ATP Attachment K for Application.pdf,ATP Attachment A for Application.pdf,ATP Attachment E for Application.pdf,ATP Attachment D for Applicaiton.pdf,ATP Attachment B Location Map.pdf,,ATP Attachment I for Application.pdf,,,ATP Attachment F for Application.pdf,,2020-09-18 11:41:33,"5-Morro Bay, City of-1",CYCLE 5,N,5391,No,Yes,Morro Bay GP/LCP Circulation Element Draft Update,Construction of a 6 leg roundabout will provid...,Yes,0,800,650,860,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,75,0,9,0,0,2,800,0,800,,0,,0,8,270,0,0,0,0,0,0,0,,0,1.0,0,0,0,0,No,No,Yes,,0,0,,0,0,0,0,0,0,1,2516,2020-09-18 11:41:34,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,35,24,17


### Export entires that need manual assistance

In [55]:
assem_manual = df>>filter(_.assembly_district=="Needs Manual Assistance")

In [56]:
congr_manual =  df>>filter(_.congressional_district=="Needs Manual Assistance")

In [57]:
senate_manual =  df>>filter(_.senate_district=="Needs Manual Assistance")

In [58]:
needs_assistance = pd.concat([senate_manual, congr_manual], ignore_index=True)

In [59]:
needs_assistance = pd.concat([needs_assistance, assem_manual], ignore_index=True)

In [60]:
needs_assistance = needs_assistance.drop_duplicates()

In [61]:
needs_assistance.sample()

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_exhibit22_plan,attch_letters_of_support,attch_link,attch_ni_workplan,attch_project_estimate,completed_pdf_form,main_datetime_stamp,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,matches,assembly_district,congressional_district,senate_district
7,Oakland,04-6480F15,Yes,Alameda County Transportation Commission,00526S,"1111 Broadway, Suite 800",Deputy Executive Director of Projects,94607,ATT_PA1.pdf,"Cities of Oakland, San Leandro, Hayward, Alame...",Yes,"Director of Transportation, City Manager, City...",18,20,,13,15,,Alameda,4,PS&E for a 16-mile Class I and Class IV facili...,Along BART and Oakland Subdivision UPRR alignm...,East Bay Greenway,Project is located within one of the ten large...,MTC,Yes,1,37.68,-122.11,The East Bay Greenway is a proposed 37-mile lo...,,,9,10,,,No,,Yes,,Yes,,No,Infrastructure - Large,Yes,67,0,Yes,33,No,0,,0,No,1,2574,ATT_K.pdf,Att_A.pdf,ATT_E.pdf,Att_D.pdf,Att_B.pdf,,Att_I.pdf,,,Att_F.pdf,,2020-09-15 20:19:02,4-Alameda County Transportation Commission-1,CYCLE 5,N,6480,0,0,,Construction of a 3.5 mile Class IV cycle trac...,Yes,5,69305,0,0,14256,,0,13,0,0,0,0,0,,0,,0,69305,0,9,0,,0,0,0,0,,0,,0,0,0,91,0,0,0,0,107,0,0,1900,,0,,0,59,4800,49,30,0,4,0,0,0,,0,0.0,0,0,0,0,No,Yes,Yes,Remove Left-Turn Pocket,0,6,,0,1,2250,0,9,0,0,2574,2020-09-15 20:19:02,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,"18, 20","13, 15",Needs Manual Assistance


In [62]:
#needs_assistance.to_excel(f"{GCS_FILE_PATH}needs_assistance/needs_assistance_districts.xlsx")

In [63]:
print(len(congr_manual))
print(len(senate_manual))
print(len(assem_manual))

12
16
13


In [64]:
def export_district_need_assistance(df):
    #export the issues to folder for now so that we can know which entries to check
    ## get the rows that need assistance
    assem_manual = df>>filter(_.assembly_district=="Needs Manual Assistance")
    congr_manual =  df>>filter(_.congressional_district=="Needs Manual Assistance")
    senate_manual =  df>>filter(_.senate_district=="Needs Manual Assistance")
    
    ## concat them into one df 
    needs_assistance = pd.concat([senate_manual, congr_manual], ignore_index=True)
    needs_assistance = pd.concat([needs_assistance, assem_manual], ignore_index=True)
    
    ## drop duplicat entries
    needs_assistance = needs_assistance.drop_duplicates()
    
    needs_assistance = (needs_assistance>>select(_.a1_imp_agcy_city,  _.a1_imp_agcy_name, _.a1_imp_agcy_zip,
                                                 _.a1_imp_agcy_fed_ma_num, _.a1_imp_agcy_state_ma_num,
                                                 _.a2_assem_dist_a, _.a2_assem_dist_b, _.a2_assem_dist_c,
                                                 _.a2_congress_dist_a, _.a2_congress_dist_b, _.a2_congress_dist_c, 
                                                 _.a2_senate_dist_a, _.a2_senate_dist_b, _.a2_senatedistc,
                                                 _.assembly_district, _.congressional_district, _.senate_district))
    
    ## export to internal GCS bucket (can change)
    needs_assistance.to_excel(f"{GCS_FILE_PATH}needs_assistance/needs_assistance_districts.xlsx")
    

In [65]:
export_district_need_assistance(df)

## Change 0 values to Null

In [66]:
df_zero = df.loc[:, df.eq(0).any()]

# df[, 12:18][df[, 12:18] == 0] <- NA



In [67]:
df_zero_list = df_zero.columns.to_list()

In [68]:
## note: we might want to take out the assembly, congress and senate districts from this list

In [69]:
df_zero_list

['a2_assem_dist_a',
 'a2_assem_dist_b',
 'a2_assem_dist_c',
 'a2_congress_dist_a',
 'a2_congress_dist_b',
 'a2_congress_dist_c',
 'a2_past_proj_qty',
 'a2_senate_dist_a',
 'a2_senate_dist_b',
 'a2_senatedistc',
 'a3_st_bicycle_pct',
 'a3_st_num_schools',
 'a3_st_ped_pct',
 'a3_trail_elig_cost',
 'a3_trail_trans_pct',
 'agency_app_num',
 'b_sig_inter_new_bike_boxes',
 'b_class_1',
 'b_class_2',
 'b_class_3',
 'b_class_4',
 'b_light_intersection',
 'b_mid_block_new_rrfb_signal',
 'b_mid_block_surf_improv',
 'b_bsp_new_bikes',
 'b_bike_new_secured_lockers',
 'b_bike_new_racks',
 'b_bsp_new_station',
 'b_other_bike_improv_qty_1',
 'b_other_bike_improv_qty_2',
 'b_light_rdwy_seg',
 'b_sig_inter_timing_improv',
 'b_un_sig_new_rrfb_signal',
 'b_un_sig_cross_surf_improv',
 'm_cls_1_trails_widen_recon_exist',
 'm_cls_1_trails_new__less_than_9',
 'm_cls_1_trails_new_over_9',
 'm_non_cls_trails_new',
 'm_other_trail_improv_qty_1',
 'm_other_trail_improv_qty_2',
 'm_non_cls_widen_recon_exist',
 'p

In [70]:
#df[df_zero_list] = df[df_zero_list].replace({'0':np.nan, 0:np.nan})

In [71]:
def convert_zeros_to_nan(df):
    df_zero = df.loc[:, df.eq(0).any()]
    df_zero.drop(['a2_assem_dist_b','a2_assem_dist_c', 'a2_congress_dist_b', 'a2_congress_dist_c', 'a2_senate_dist_b', 'a2_senatedistc',
              'a2_past_proj_qty', 'a3_st_num_schools', 'agency_app_num',
             'a3_st_ped_pct', 'a3_trail_trans_pct', 'a4_ped_gap_pct',  'a4_reg_init_pct', 'a4_com_init_pct',
              'a4_safe_route_pct', 'a4_fl_mile_pct', 'a4_emp_based_pct', 'a4_other_ni_pct'
             ], axis=1, inplace=True)
    df_zero_list = df_zero.columns.to_list()
    df[df_zero_list] = df[df_zero_list].replace({'0':np.nan, 0:np.nan})
    
    return df

In [72]:
df = convert_zeros_to_nan(df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [73]:
df.head()

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_exhibit22_plan,attch_letters_of_support,attch_link,attch_ni_workplan,attch_project_estimate,completed_pdf_form,main_datetime_stamp,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,matches,assembly_district,congressional_district,senate_district
0,Merced,10-5939R,Yes,Merced County,00033S,345 west 7th street,Deputy Director,95340,,,No,,21,,,16,,,Merced,10,"PA&ED, PS&E, and CON funding for construction ...",1) South side of Haskell Ave from Cody ave to ...,Planada Sidewalk Infill Project,Project is located outside one of the ten larg...,MCAG,No,0,37.29,120.31,The Planada Sidewalk Infill Project is located...,,,12,,,,No,,Yes,,Yes,,No,Infrastructure - Small,Yes,20.0,1,Yes,80,Yes,,,0,No,1,1802,Planada Sidewalk infill ATP cross section 1.pdf,Attachment A- Signature Page.pdf,Existing Photos Attachment.pdf,Planada ATP Plan Concept.pdf,Attachment-B-Engr-Checklist (MH).pdf,,Letters of Support.pdf,,,Project Estimate.pdf,,2020-06-09 10:33:08,10-Merced County-1,CYCLE 5,N,5939,No,No,,Sidewalk infill along portions of Haskell aven...,No,,,,1500.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,6.0,,1500.0,,,,,,5.0,,4.0,3.0,,,,,,,,,,,,,No,No,Yes,,,,,,,,,,,,1802,2020-06-09 10:33:08,N,0,N,0,N,0,N,0,,0.0,N,,0,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,21,16,12
1,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Bishop Street Class 3 Bicycle Boulevard with T...,Bishop Street from Flower Street to Standard A...,Bishop Street Bicycle Boulevard Project,Project is located within one of the ten large...,SCAG,Yes,2,33.74,117.86,This project will implement a Class 3 bicycle ...,,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Medium,Yes,50.0,0,Yes,50,No,,,0,No,4,1811,Attachment K - Not Applicable.pdf,Attachment A - Signature Page.pdf,Attachment E - Photos of Existing Conditions.pdf,Attachment D - Project .Plans.pdf,Attachment B - Checklist.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost .Estimate.pdf,,2020-08-20 18:49:12,"12-Santa Ana, City of-4",CYCLE 5,N,5063,No,No,,"Install 1.15 mile bike boulevard, construction...",Yes,,,,6336.0,,,,,,,,,,,,,,,2.0,,,,,,,,,,,,,,100,,,,,,,,,,,,,38.0,,15.0,16.0,,18.0,3.0,,,,1.0,6.0,,18.0,,,Yes,No,No,,,,,,,8800.0,,,,,1811,2020-08-20 18:49:12,N,0,N,0,N,0,N,0,,0.0,N,,0,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,69,46,34
2,City of Pacifica,04-5350-F15,Yes,"Pacifica, City of",,151 Milagra Drive,Associate Civil Engineer,94044,,,No,,22,,,14,,,San Mateo,4,CON funding for installing bicycling facilitie...,On Palmetto Ave between Paloma Ave and West Av...,Palmetto Ave - Esplanade Ave Bicycle & Pedestr...,Project is located outside one of the ten larg...,MTC,No,0,37.65,-122.49,The project will install a combination of Clas...,,,13,,,,No,,Yes,,Yes,,No,Infrastructure - Small,Yes,50.0,2,Yes,50,No,,,0,No,1,1804,,Attachment-A-Signature-page.pdf,Photos.pdf,Attachment D_Palmetto & Esplanade Ped-Bike Imp...,Attachment B_Engineers Checklist.pdf,,Letters of Support.pdf,,,Attachment F_ ATP Cycle 5_Palmetto-Esplanade B...,,2020-06-15 11:05:03,"4-Pacifica, City of-1",CYCLE 5,N,5350,0,0,,Bicycling and pedestrian amenities will be ins...,Yes,,,13752.0,5748.0,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,40,2.0,,,,20.0,,,,,,,,9.0,,,,,,,,,,,,,,,,Yes,No,No,,,,,,,,,,,,1804,2020-06-15 11:05:03,N,0,N,0,N,0,N,0,,0.0,N,,0,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,22,14,13
3,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Pedestrian traffic safety improvements for Jef...,"In the City of Santa Ana, the safe routes to s...",Jefferson ES_Thorpe Fundamental_McFadden Int_G...,Project is located within one of the ten large...,SCAG,Yes,2,33.71,117.89,"This project will be repairing, replacing and ...",,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Large,No,,5,Yes,100,Yes,,,0,No,13,1822,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D -Plans.pdf,Attachment B - Check list.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,,2020-09-08 10:15:52,"12-Santa Ana, City of-13",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 8 intersections, ...",Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,50,,,,,60.0,,,,Left Turn Arrow,3.0,Enhanced Crosswalk Unsignalized,3.0,218.0,1000.0,7.0,,,1.0,,,,,,,,7.0,,,Yes,No,No,,,,,,,,,,,,1822,2020-09-08 10:15:52,N,0,N,0,N,0,N,0,,0.0,N,,0,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,69,46,34
4,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Pedestrian traffic safety improvements for La...,"In the City of Santa Ana, the safe routes to s...",Lathrop Intermediate_Lowell ES_Martin ES_Pio P...,Project is located within one of the ten large...,SCAG,Yes,4,33.73,117.87,"This project will be repairing, replacing and ...",,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Large,No,,5,Yes,100,Yes,,,0,No,14,1823,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D - Plan.pdf,Attachment B - Checklist.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,,2020-08-31 12:34:31,"12-Santa Ana, City of-14",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 6 intersections, ...",Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,50,,,,,43.0,,,,Enhance crosswalk (unsignalized),7.0,Raised Crosswalk,2.0,189.0,3455.0,5.0,,,1.0,,,,,,,2.0,5.0,,,Yes,No,No,,,,,,,,,,,,1823,2020-08-31 12:34:31,N,0,N,0,N,0,N,0,,0.0,N,,0,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,69,46,34


## Null columns

In [74]:
# finding columns with all null and dropping for now. will keep out of script

In [75]:
alldatanull = alldata.columns[alldata.isna().all()].tolist()

In [76]:
alldatanull

['#',
 'atp_id',
 'ppno',
 'ppno_1',
 'a2_project_location_map',
 'a3_plan_active_trans',
 'a3_plan_bicycle',
 'a3_plan_ped',
 'a3_plan_srts',
 'a4_bike_gap_pct',
 'a4_easement_support',
 'a4_emp_based']

In [77]:
alldata = alldata.drop(columns=alldatanull)

In [78]:
dfnull = df.columns[df.isna().all()].tolist()
df = df.drop(columns=dfnull)

In [79]:
dfnull

['a2_project_location_map',
 'a3_plan_active_trans',
 'a3_plan_bicycle',
 'a3_plan_ped',
 'a3_plan_srts',
 'a3_trail_elig_cost',
 'attch_exhibit22_plan',
 'attch_link',
 'completed_pdf_form',
 'a4_bike_gap_pct',
 'a4_easement_support',
 'a4_emp_based',
 'a4_le_methods']

In [80]:
cleanednull = cleaned.columns[cleaned.isna().all()].tolist()
cleaned = cleaned.drop(columns=cleanednull)

## Changing Column Types

In [81]:
df.a2_mpo.value_counts()

SCAG        315
MTC         132
SANDAG       62
Caltrans     52
SACOG        52
SJCOG        38
TCAG         37
AMBAG        34
KCOG         31
COFCG        31
SBCAG        19
SLOCOG       15
BCAG         14
TMPO         13
SRTA         11
StanCOG      11
MCTC          7
KCAG          3
MCAG          3
CVAG          2
Name: a2_mpo, dtype: int64

In [82]:
df.details_datetime_stamp.info()

<class 'pandas.core.series.Series'>
Int64Index: 882 entries, 0 to 881
Series name: details_datetime_stamp
Non-Null Count  Dtype         
--------------  -----         
882 non-null    datetime64[ns]
dtypes: datetime64[ns](1)
memory usage: 13.8 KB


In [83]:
compare_col = np.where(
    df["main_datetime_stamp"] == df["details_datetime_stamp"], True, False
)
df["compare_datetime"] = compare_col
df.compare_datetime.value_counts()

True     763
False    119
Name: compare_datetime, dtype: int64

In [84]:
# some datetimes are seconds different others are larger
(
    df
    >> filter(_.compare_datetime == False)
    >> select(_.details_datetime_stamp, _.main_datetime_stamp)
)

Unnamed: 0,details_datetime_stamp,main_datetime_stamp
27,2020-09-10 16:23:42,2020-09-10 16:23:41
37,2020-09-11 12:48:11,2020-09-11 12:48:10
55,2020-09-10 14:14:03,2020-09-10 14:14:02
67,2020-09-14 19:20:56,2020-09-14 19:20:55
75,2020-09-11 16:29:11,2020-09-11 16:29:10
...,...,...
869,2022-06-16 12:10:18,2022-06-21 11:28:23
871,2022-06-16 12:12:24,2022-06-16 12:12:23
872,2022-06-16 12:15:24,2022-06-16 12:15:23
874,2022-06-16 10:57:37,2022-06-16 10:57:36


In [85]:
df = df.drop(columns="compare_datetime")

In [86]:
df.sample(2)

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans_exists,a3_plan_bicycle_exists,a3_plan_ped_exists,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_letters_of_support,attch_ni_workplan,attch_project_estimate,main_datetime_stamp,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,matches,assembly_district,congressional_district,senate_district
638,Pico Rivera,07-5351R,Yes,"Pico Rivera, City of",00194S,6615 Passons Blvd.,City Manager,90660,,,No,,58,,,38,,,Los Angeles,7,Develop a planning document proposing bicycle ...,City of Pico Rivera,Pico Rivera Active Transportation Master Plan,Project is located within one of the ten large...,SCAG,Yes,12,,,"The City of Pico Rivera is a quintessential, a...",,32,,,No,No,No,Yes,Plan,Yes,50.0,0,Yes,50,No,,0,No,1,3806,Att K - Additional Backup Docs.pdf,Att A - Signature Page.pdf,Att E - Photos of Existing Conditions.pdf,,,Att I - Letters of Support.pdf,,,2022-06-15 19:59:09,"7-Pico Rivera, City of-1",CYCLE 6,N,5351,0,0,Urban Greening Plan; General Plan Circulation ...,Develop 1 active transportation plan within a ...,No,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,No,No,No,,,,,,,,,,,,3806,2022-06-15 19:59:09,N,0,N,0,N,0,N,0,,N,,0,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,Y,N,No,N,both,58,38,32
60,Santa Cruz,05-5025R,Yes,"Santa Cruz, City of",00244S,809 Center St,Senior Engineer,95060,,,No,,29,,,18,20.0,,Santa Cruz,5,Construction of .8 miles of Segment 7 of the R...,Adjacent to the Santa Cruz Branch Rail Line be...,Santa Cruz Rail Trail Segment 7 Phase 2 Constr...,Project is located outside one of the ten larg...,AMBAG,Yes,2,36.96,-122.03,The project will close a .8 mile gap in the Ra...,SCCRTC,17,,,Yes,No,No,Yes,Infrastructure + NI - Large,Yes,50.0,1,Yes,50,Yes,Yes,100,Yes,2,1913,Intentionally Blank.pdf,AttachmentA_signed.pdf,Attachment E_Project Site Photos Rail Trail Se...,AttachmentD_RTS7 100% CON DOCS PHASE 2_7-10-20...,Attachment-B-Engr-Checklist_RTS7P2_EXECUTED.PDF,Attachment I_ LOS Combined.pdf,Attachment-G-Ex-22-R Segment 7.xlsx,Attachment-F-Engineering Estimate_RTS7P2.xlsx,2020-09-15 18:17:09,"5-Santa Cruz, City of-2",CYCLE 5,N,5025,No,No,,Construction of .8 miles of Segment 7 of the R...,Yes,,,,,,,,,,,,,,,,,,,,,,,4172.0,,Wayfinding signage,40.0,Lighting,45.0,,,100,,,,,,,,,,,,,,,,,,,,,38.0,Willow and heritage,,,,,,,No,No,Yes,,,,,,,,,,,,1913,2020-09-15 18:17:09,N,0,N,30,N,70,N,0,0.0,N,,0,,20.0,,,,,24.0,4.0,,24.0,12.0,,,,144.0,,6.0,20.0,Parent education classes on bike and pedestria...,8.0,Group family rides,Y,N,Y,Y,Y,N,,Spanish,Y,Y,Y,Y,Y,N,,N,N,N,N,No,N,both,29,"18, 20",17


In [87]:
df.columns.tolist()

['a1_imp_agcy_city',
 'a1_imp_agcy_fed_ma_num',
 'a1_imp_agcy_ma',
 'a1_imp_agcy_name',
 'a1_imp_agcy_state_ma_num',
 'a1_imp_agcy_street',
 'a1_imp_agcy_title',
 'a1_imp_agcy_zip',
 'a1_letter_of_intent',
 'a1_proj_partner_agcy',
 'a1_proj_partner_exists',
 'a1_proj_partner_title',
 'a2_assem_dist_a',
 'a2_assem_dist_b',
 'a2_assem_dist_c',
 'a2_congress_dist_a',
 'a2_congress_dist_b',
 'a2_congress_dist_c',
 'a2_county',
 'a2_ct_dist',
 'a2_info_proj_descr',
 'a2_info_proj_loc',
 'a2_info_proj_name',
 'a2_mop_uza_population',
 'a2_mpo',
 'a2_past_proj',
 'a2_past_proj_qty',
 'a2_proj_lat',
 'a2_proj_long',
 'a2_proj_scope_summary',
 'a2_rtpa',
 'a2_senate_dist_a',
 'a2_senate_dist_b',
 'a2_senatedistc',
 'a3_plan_active_trans_exists',
 'a3_plan_bicycle_exists',
 'a3_plan_ped_exists',
 'a3_plan_srts_exists',
 'a3_proj_type',
 'a3_st_bicycle_applies',
 'a3_st_bicycle_pct',
 'a3_st_num_schools',
 'a3_st_ped_applies',
 'a3_st_ped_pct',
 'a3_st_srts',
 'a3_trail_fed_funding',
 'a3_trail_t

### Add Geometry

In [88]:
from dla_utils import _dla_utils
from shared_utils import geography_utils

In [89]:
gdf = geography_utils.create_point_geometry(
    df, longitude_col="a2_proj_long", latitude_col="a2_proj_lat"
)

In [90]:
gdf.sample(1)

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans_exists,a3_plan_bicycle_exists,a3_plan_ped_exists,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_letters_of_support,attch_ni_workplan,attch_project_estimate,main_datetime_stamp,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,matches,assembly_district,congressional_district,senate_district,geometry
798,Arroyo Grande,MA 05-5199F15,Yes,"Arroyo Grande, City of",MA 00190S,300 E Branch Street,City Engineer,93420,,,No,,35,,,24,,,San Luis Obispo,5,"PA&ED, PS&E, ROW, Construction & NI funding to...","Within the City of Arroyo Grande, along North ...",Halcyon Road Complete Streets Project,Project is located outside one of the large MP...,SLOCOG,No,0,,,The project consists of Complete Streets impro...,,17,,,No,Yes,Yes,Yes,Infrastructure + NI - Medium,Yes,50.0,2,Yes,50,Yes,,0,No,1,3683,CK Additional Attachments.pdf,CA Signature Page.pdf,CE Photos of Existing Conditions.pdf,CD Project Plans.pdf,CB Engineers Checklist.pdf,CI Letters of Support.pdf,CG Non Infrastructure 25-R.pdf,CF Project Estimate.pdf,2022-06-15 16:26:48,"5-Arroyo Grande, City of-1",CYCLE 6,N,5199,0,0,Halcyon Road Complete Streets Plan; Circulatio...,"Construct 1,025 LF of sidewalk, 12,200 LF of b...",Yes,,,12200.0,,,,,,,,,,,,,,,,,875.0,,880.0,,,,,,,,,30,2.0,1.0,,2.0,4.0,,1025.0,,,,,,35.0,,5.0,,,1.0,,,,,,,,,2.0,,No,No,Yes,5400 LF lane narrowing,,1.0,,,,4600.0,1.0,2.0,,,3683,2022-06-15 16:26:48,N,0,Y,25,Y,75,N,0,,N,,0,,,,,,5.0,,2.0,2.0,,,,,,,,,,,,,Y,N,N,Y,Y,N,,Spanish,N,N,N,N,N,N,,N,N,N,N,No,N,both,35,24,17,POINT EMPTY


### Change columns to integers

In [91]:
def get_num(x):
    try:
        return int(x)
    except Exception:
        try:
            return float(x)
        except Exception:
            return x

In [92]:
columns_to_int = [
    "a1_locode",
    # "a2_senatedistc",
    # "a2_senate_dist_b",
    # "a2_assem_dist_b",
    # "a2_assem_dist_c",
    # "a2_congress_dist_b",
    # "a2_congress_dist_c",
    # "a2_proj_lat",
    # "a2_proj_long",
    # "a2_senate_dist_b",
    # "a2_senatedistc",
    "p_un_sig_inter_new_roundabout",
 #   "a4_emp_based_pct",
#    "a4_le_methods",
    "a4_srts_le",
    "a1_locode",
    "a2_senatedistc",
    "a2_senate_dist_b",
]

In [93]:
# gdf[columns_to_int] = gdf[columns_to_int].apply(get_num)

In [94]:
for col in columns_to_int:
    gdf[col] = gdf[col].apply(get_num)

In [95]:
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 882 entries, 0 to 881
Columns: 202 entries, a1_imp_agcy_city to geometry
dtypes: Int64(7), category(1), datetime64[ns](2), float64(83), geometry(1), int64(15), object(93)
memory usage: 1.4+ MB


In [96]:
gdf.select_dtypes("int64")

Unnamed: 0,a1_imp_agcy_zip,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_ct_dist,a2_past_proj_qty,a2_senate_dist_a,a3_st_num_schools,a3_st_ped_pct,a3_trail_trans_pct,agency_app_num,app_pk,a4_ped_gap_pct,app_fk,a4_reg_init_pct,a4_com_init_pct,a4_safe_route_pct,a4_fl_mile_pct,a4_other_ni_pct
0,95340,21,,,16,,,10,0,12,1,80,0,1,1802,0,1802,0,0,0,0,0
1,92702,69,,,46,,,12,2,34,0,50,0,4,1811,100,1811,0,0,0,0,0
2,94044,22,,,14,,,4,0,13,2,50,0,1,1804,40,1804,0,0,0,0,0
3,92702,69,,,46,,,12,2,34,5,100,0,13,1822,50,1822,0,0,0,0,0
4,92702,69,,,46,,,12,4,34,5,100,0,14,1823,50,1823,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
877,91362,,4,4,,2,6,7,0,,0,5,0,1,3192,5,3192,0,0,0,0,0
878,91733,49,,,32,,,7,5,22,4,25,0,1,3859,0,3859,0,0,0,0,0
879,95113,25,,,19,,,4,1,15,0,40,0,3,3860,2,3860,0,0,0,0,0
880,93101,,3,7,,2,4,5,2,,1,75,0,2,3845,20,3845,50,0,50,0,0


In [97]:
gdf.select_dtypes("object")

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_county,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_proj_scope_summary,a2_rtpa,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans_exists,a3_plan_bicycle_exists,a3_plan_ped_exists,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_ped_applies,a3_st_srts,a3_trail_fed_funding,a3_trails,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_letters_of_support,attch_ni_workplan,attch_project_estimate,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_other_bike_improv_1,b_other_bike_improv_2,m_other_trail_imprv_1,m_other_trail_imprv_2,p_other_ped_imprv_1,p_other_ped_imprv_2,p_amenities_shade_tree_type,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_other_traffic_calming_imprv_2,a4_reg_init,a4_com_init,a4_safe_route,a4_fl_mile,a4_other_ni,a4_other_ni_descr,a4_act_other_1_descr,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,assembly_district,congressional_district,senate_district
0,Merced,10-5939R,Yes,Merced County,00033S,345 west 7th street,Deputy Director,,,No,,Merced,"PA&ED, PS&E, and CON funding for construction ...",1) South side of Haskell Ave from Cody ave to ...,Planada Sidewalk Infill Project,Project is located outside one of the ten larg...,MCAG,No,The Planada Sidewalk Infill Project is located...,,,,No,Yes,Yes,No,Infrastructure - Small,Yes,Yes,Yes,,No,Planada Sidewalk infill ATP cross section 1.pdf,Attachment A- Signature Page.pdf,Existing Photos Attachment.pdf,Planada ATP Plan Concept.pdf,Attachment-B-Engr-Checklist (MH).pdf,Letters of Support.pdf,,Project Estimate.pdf,10-Merced County-1,CYCLE 5,N,5939,No,No,,Sidewalk infill along portions of Haskell aven...,No,,,,,,,,No,No,Yes,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,21,16,12
1,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,,,No,,Orange,Bishop Street Class 3 Bicycle Boulevard with T...,Bishop Street from Flower Street to Standard A...,Bishop Street Bicycle Boulevard Project,Project is located within one of the ten large...,SCAG,Yes,This project will implement a Class 3 bicycle ...,,,,Yes,Yes,No,Yes,Infrastructure - Medium,Yes,Yes,No,,No,Attachment K - Not Applicable.pdf,Attachment A - Signature Page.pdf,Attachment E - Photos of Existing Conditions.pdf,Attachment D - Project .Plans.pdf,Attachment B - Checklist.pdf,Attachment I - Letter of Support.pdf,Attachment G - Not Applicable.pdf,Attachment F - Cost .Estimate.pdf,"12-Santa Ana, City of-4",CYCLE 5,N,5063,No,No,,"Install 1.15 mile bike boulevard, construction...",Yes,,,,,,,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,69,46,34
2,City of Pacifica,04-5350-F15,Yes,"Pacifica, City of",,151 Milagra Drive,Associate Civil Engineer,,,No,,San Mateo,CON funding for installing bicycling facilitie...,On Palmetto Ave between Paloma Ave and West Av...,Palmetto Ave - Esplanade Ave Bicycle & Pedestr...,Project is located outside one of the ten larg...,MTC,No,The project will install a combination of Clas...,,,,No,Yes,Yes,No,Infrastructure - Small,Yes,Yes,No,,No,,Attachment-A-Signature-page.pdf,Photos.pdf,Attachment D_Palmetto & Esplanade Ped-Bike Imp...,Attachment B_Engineers Checklist.pdf,Letters of Support.pdf,,Attachment F_ ATP Cycle 5_Palmetto-Esplanade B...,"4-Pacifica, City of-1",CYCLE 5,N,5350,0,0,,Bicycling and pedestrian amenities will be ins...,Yes,,,,,,,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,22,14,13
3,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,,,No,,Orange,Pedestrian traffic safety improvements for Jef...,"In the City of Santa Ana, the safe routes to s...",Jefferson ES_Thorpe Fundamental_McFadden Int_G...,Project is located within one of the ten large...,SCAG,Yes,"This project will be repairing, replacing and ...",,,,Yes,Yes,No,Yes,Infrastructure - Large,No,Yes,Yes,,No,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D -Plans.pdf,Attachment B - Check list.pdf,Attachment I - Letter of Support.pdf,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,"12-Santa Ana, City of-13",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 8 intersections, ...",Yes,,,,,Left Turn Arrow,Enhanced Crosswalk Unsignalized,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,69,46,34
4,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,,,No,,Orange,Pedestrian traffic safety improvements for La...,"In the City of Santa Ana, the safe routes to s...",Lathrop Intermediate_Lowell ES_Martin ES_Pio P...,Project is located within one of the ten large...,SCAG,Yes,"This project will be repairing, replacing and ...",,,,Yes,Yes,No,Yes,Infrastructure - Large,No,Yes,Yes,,No,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D - Plan.pdf,Attachment B - Checklist.pdf,Attachment I - Letter of Support.pdf,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,"12-Santa Ana, City of-14",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 6 intersections, ...",Yes,,,,,Enhance crosswalk (unsignalized),Raised Crosswalk,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,69,46,34
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
877,Thousand Oaks,07-5292F15,Yes,"Thousand Oaks, City of",00252,2100 Thousand Oaks Blvd.,Transportation Planner,,,No,,Ventura,"Construction funding for Class IV bikelanes, ...","In the City of Thousand Oaks, Lynn Road betwee...",Lynn Road Bike Lanes and Pedestrain Improvements,Project is located within one of the ten large...,SCAG,No,The project is located on 4.5-miles of Lynn R...,,2,7,Yes,No,No,No,Infrastructure - Small,Yes,Yes,No,,No,ATTACHMENT K.pdf,Attachement A Signed.pdf,photoskn.pdf,Lynn Concept Plans.pdf,Attachment-B-Engr-Checklist_Lynn_SB.pdf,Letters of Support.pdf,,Attachment-F-Project-Estimate_Lynn_SB.xlsx,"7-Thousand Oaks, City of-1",CYCLE 6,N,5392,No,Yes,Local Road Safety Plan,"265' new sidewalk, 2 rapid flashing beacons, 1...",Yes,,,,,,,,No,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,44,26,27
878,South El Monte,,Yes,"South El Monte, City of",07-5352S21,1415 Santa Anita Avenue,Community Development Director,Partner Agency Letter of Intent.pdf,City of El Monte,Yes,City Engineer,Los Angeles,Construct Class II bike lane segments; install...,Merced Avenue from Garvey Avenue to Fern Stree...,Merced Avenue Greenway,Project is located within one of the ten large...,SCAG,Yes,The project will implement bicyclist/pedestria...,,,,No,Yes,No,No,Infrastructure - Small,Yes,Yes,No,,No,Att K - Support Docs.pdf,Att A - Signature Page.pdf,Att E - Photos of Existing Conditions.pdf,Att D - Project Plans.pdf,Att B - Eng Checklist.pdf,Att I - Letters of Support.pdf,Att G - Not Applicable.pdf,Att F - Project Estimate.xlsx,"7-South El Monte, City of-1",CYCLE 6,N,5352,0,0,,Construct 0.97-mile Class II bike path; 4 enha...,Yes,,,,,,,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,49,32,22
879,San Jose,04-5005F15,Yes,"San Jose, City of",00200S,200 E Santa Clara St,Senior Transportation Specialist,,,No,,Santa Clara,This project will decouple 2nd and 3rd street ...,The project is in SoFA arts district in southw...,2nd & 3rd Street De-Coupling and Complete Stre...,Project is located within one of the ten large...,MTC,Yes,"The City of San José, through its Downtown Tra...",,,,No,Yes,No,No,Infrastructure - Large,Yes,Yes,No,,No,attachment k.pdf,Attachment-A-Signature-Page (1)_jr (1).pdf,Attachment_G_Site_Photos.pdf,2_3DESIGNS.pdf,Attachment-B-Engr-Checklist- 2nd and 3rd.pdf,LOS.pdf,,2nd and 3rd ATP Engineers Estimate_Final.pdf,"4-San Jose, City of-3",CYCLE 6,N,5005,0,0,"Emerging mobility Action Plan, Carbon Neutral ...",Project constructs approximately 6840 feet of ...,Yes,Bike Ramps,Raised Intersections,,,Fully Bulbed (all 4 corners),,,Yes,No,No,Conversion of 1 to 2 way operation,<---- 0.68 miles,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,25,19,15
880,Santa Barbara,05-5951R,Yes,Santa Barbara County,00100S,123 E. Anapuma St,Alternative Transportation Manager,,,No,,Santa Barbara,"Curb extensions, sidewalks and crosswalks for ...",Unincorporated neighborhood located south of E...,Isla Vista Bike and Pedestrian Improvements Pr...,Project is located outside one of the large MP...,SBCAG,Yes,"Isla Vista is a place like no other. 15,733 pe...",,1,9,Yes,No,No,No,Infrastructure + NI - Medium,Yes,Yes,Yes,,No,,Attachment A_Signature Page - 2022.pdf,Existing Conditions Photos.pdf,Isla Vista Community Improvements - ATP Cycle ...,Attachment B-Engr Checklist IV.pdf,Attachment I - Letters of Support 2022.pdf,Attachment-G-Exhibit-25-R-NI-Work-Plan - Isla ...,Attachment-F-Project-Estimate-IV Updated.pdf,5-Santa Barbara County-2,CYCLE 6,N,5951,0,0,Regional Transportation Plan,"Curb extensions, sidewalks, and bicycle networ...",Yes,Bike left-hand turn lanes,Class 2 conflict / intersection striping,,,,,,No,No,Yes,,,Y,N,Y,N,N,,,,N,N,Y,Y,N,N,,"Spanish, Mandarin",Y,N,Y,N,N,N,,N,N,N,N,No,N,37,24,19


In [98]:
gdf.select_dtypes("float64")

Unnamed: 0,a2_proj_lat,a2_proj_long,a3_st_bicycle_pct,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_qty_1,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_improv_qty_1,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_qty_1,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,a4_emp_based_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_2
0,37.29,120.31,20.00,,,,1500.00,,,,,,,,,,,,,,,,,,,,,,,,,,,6.00,,1500.00,,,,5.00,,4.00,3.00,,,,,,,,,,,,,,,,,,,,,0.00,,,,,,,,,,,,,,,,,,,
1,33.74,117.86,50.00,,,,6336.00,,,,,,,,,,,,2.00,,,,,,,,,,,,,,,,,,,,,38.00,,15.00,16.00,,18.00,3.00,,,1.00,6.00,,18.00,,,,,,,8800.00,,,,,0.00,,,,,,,,,,,,,,,,,,,
2,37.65,-122.49,50.00,,,13752.00,5748.00,,,,,,,,,,,,,1.00,,,,,,,,,,2.00,,,,20.00,,,,,,9.00,,,,,,,,,,,,,,,,,,,,,,,,0.00,,,,,,,,,,,,,,,,,,,
3,33.71,117.89,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,60.00,,,,3.00,3.00,218.00,1000.00,7.00,,,1.00,,,,,,,7.00,,,,,,,,,,,,0.00,,,,,,,,,,,,,,,,,,,
4,33.73,117.87,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,43.00,,,,7.00,2.00,189.00,3455.00,5.00,,,1.00,,,,,,2.00,5.00,,,,,,,,,,,,0.00,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
877,,,95.00,,,,,24820.00,,,,,,,,,,324.00,2.00,2.00,,,,,,,,,,,,,,,,265.00,,,,4.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
878,,,75.00,,,5100.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.00,23000.00,4.00,,16.00,,,,,,,,4.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
879,,,60.00,9.00,,,,6840.00,,,,,,,,8.00,6.00,,,,4.00,,,,,,,,,,,,,,,,,60.00,,48.00,6840.00,8.00,,,2.00,,,,,,,4.00,4.00,6840.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
880,,,25.00,2.00,,1700.00,22410.00,,,,,,,,,10.00,24.00,,,,,,,,,,,,,1.00,,,1.00,110.00,,2820.00,,,,23.00,,,,,,,,,,,,42.00,17.00,2700.00,,,,,,,,,,,2.00,4.00,4.00,,,,,,4.00,2.00,,,,,,,,,


## Locode Check

In [99]:
gdf.a1_locode.info()

<class 'pandas.core.series.Series'>
Int64Index: 882 entries, 0 to 881
Series name: a1_locode
Non-Null Count  Dtype 
--------------  ----- 
881 non-null    object
dtypes: object(1)
memory usage: 13.8+ KB


In [100]:
(gdf>>select(_.a1_locode)>>filter(_.a1_locode.isna()))

Unnamed: 0,a1_locode
433,


In [101]:
(gdf.loc[:, gdf.isna().any()])

Unnamed: 0,a1_imp_agcy_fed_ma_num,a1_imp_agcy_state_ma_num,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_info_proj_descr,a2_proj_lat,a2_proj_long,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_st_bicycle_pct,a3_trail_fed_funding,attch_addtl_attachments,attch_conditions_project_map,attch_engineeers_checklist,attch_letters_of_support,attch_ni_workplan,attch_project_estimate,a1_locode,a3_plan_other_desc,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,a4_emp_based_pct,a4_other_ni_descr,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_other_descr,a4_comm_language,a4_colab_other_descr
0,10-5939R,00033S,,,,21,,,16,,,"PA&ED, PS&E, and CON funding for construction ...",37.29,120.31,12,,,20.00,,Planada Sidewalk infill ATP cross section 1.pdf,Planada ATP Plan Concept.pdf,Attachment-B-Engr-Checklist (MH).pdf,Letters of Support.pdf,,Project Estimate.pdf,5939,,No,,,,1500.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,6.00,,1500.00,,,,,,5.00,,4.00,3.00,,,,,,,,,,,,,,,,,,,,,,,,0.00,,,,,,,,,,,,,,,,,,,,,,,,,
1,12-5063,00289S,,,,69,,,46,,,Bishop Street Class 3 Bicycle Boulevard with T...,33.74,117.86,34,,,50.00,,Attachment K - Not Applicable.pdf,Attachment D - Project .Plans.pdf,Attachment B - Checklist.pdf,Attachment I - Letter of Support.pdf,Attachment G - Not Applicable.pdf,Attachment F - Cost .Estimate.pdf,5063,,Yes,,,,6336.00,,,,,,,,,,,,,,2.00,,,,,,,,,,,,,,,,,,,,,,,,,38.00,,15.00,16.00,,18.00,3.00,,,,1.00,6.00,,18.00,,,,,,,,,8800.00,,,,,0.00,,,,,,,,,,,,,,,,,,,,,,,,,
2,04-5350-F15,,,,,22,,,14,,,CON funding for installing bicycling facilitie...,37.65,-122.49,13,,,50.00,,,Attachment D_Palmetto & Esplanade Ped-Bike Imp...,Attachment B_Engineers Checklist.pdf,Letters of Support.pdf,,Attachment F_ ATP Cycle 5_Palmetto-Esplanade B...,5350,,Yes,,,13752.00,5748.00,,,,,,,,,,,,,,,1.00,,,,,,,,,,,,2.00,,,,20.00,,,,,,,,9.00,,,,,,,,,,,,,,,,,,,,,,,,,,,0.00,,,,,,,,,,,,,,,,,,,,,,,,,
3,12-5063,00289S,,,,69,,,46,,,Pedestrian traffic safety improvements for Jef...,33.71,117.89,34,,,,,Attachment K.pdf,Attachment D -Plans.pdf,Attachment B - Check list.pdf,Attachment I - Letter of Support.pdf,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,5063,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,60.00,,,,Left Turn Arrow,3.00,Enhanced Crosswalk Unsignalized,3.00,218.00,1000.00,7.00,,,1.00,,,,,,,,7.00,,,,,,,,,,,,,,0.00,,,,,,,,,,,,,,,,,,,,,,,,,
4,12-5063,00289S,,,,69,,,46,,,Pedestrian traffic safety improvements for La...,33.73,117.87,34,,,,,Attachment K.pdf,Attachment D - Plan.pdf,Attachment B - Checklist.pdf,Attachment I - Letter of Support.pdf,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,5063,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,43.00,,,,Enhance crosswalk (unsignalized),7.00,Raised Crosswalk,2.00,189.00,3455.00,5.00,,,1.00,,,,,,,2.00,5.00,,,,,,,,,,,,,,0.00,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
877,07-5292F15,00252,,,,,4,4,,2,6,"Construction funding for Class IV bikelanes, ...",,,,2,7,95.00,,ATTACHMENT K.pdf,Lynn Concept Plans.pdf,Attachment-B-Engr-Checklist_Lynn_SB.pdf,Letters of Support.pdf,,Attachment-F-Project-Estimate_Lynn_SB.xlsx,5392,Local Road Safety Plan,Yes,,,,,24820.00,,,,,,,,,,,,324.00,2.00,2.00,,,,,,,,,,,,,,,,,,265.00,,,,,,4.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
878,,07-5352S21,Partner Agency Letter of Intent.pdf,City of El Monte,City Engineer,49,,,32,,,Construct Class II bike lane segments; install...,,,22,,,75.00,,Att K - Support Docs.pdf,Att D - Project Plans.pdf,Att B - Eng Checklist.pdf,Att I - Letters of Support.pdf,Att G - Not Applicable.pdf,Att F - Project Estimate.xlsx,5352,,Yes,,,5100.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.00,23000.00,4.00,,16.00,,,,,,,,,4.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
879,04-5005F15,00200S,,,,25,,,19,,,This project will decouple 2nd and 3rd street ...,,,15,,,60.00,,attachment k.pdf,2_3DESIGNS.pdf,Attachment-B-Engr-Checklist- 2nd and 3rd.pdf,LOS.pdf,,2nd and 3rd ATP Engineers Estimate_Final.pdf,5005,"Emerging mobility Action Plan, Carbon Neutral ...",Yes,9.00,,,,6840.00,,,,,,,,Bike Ramps,8.00,Raised Intersections,6.00,,,,4.00,,,,,,,,,,,,,,,,,,,Fully Bulbed (all 4 corners),60.00,,,48.00,6840.00,8.00,,,2.00,,,,,,,,4.00,4.00,6840.00,Conversion of 1 to 2 way operation,,,<---- 0.68 miles,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
880,05-5951R,00100S,,,,,3,7,,2,4,"Curb extensions, sidewalks and crosswalks for ...",,,,1,9,25.00,,,Isla Vista Community Improvements - ATP Cycle ...,Attachment B-Engr Checklist IV.pdf,Attachment I - Letters of Support 2022.pdf,Attachment-G-Exhibit-25-R-NI-Work-Plan - Isla ...,Attachment-F-Project-Estimate-IV Updated.pdf,5951,Regional Transportation Plan,Yes,2.00,,1700.00,22410.00,,,,,,,,,Bike left-hand turn lanes,10.00,Class 2 conflict / intersection striping,24.00,,,,,,,,,,,,,,,1.00,,,1.00,110.00,,2820.00,,,,,,23.00,,,,,,,,,,,,,42.00,17.00,2700.00,,,,,,,,,,,,,,2.00,4.00,4.00,,,,,,4.00,2.00,,,,,,,,,,,,,"Spanish, Mandarin",


In [102]:
(cleaned[pd.to_numeric(cleaned['a1_locode'], errors='coerce').isnull()])>>select(_.a1_imp_agcy_city, _.a2_county, _.a1_imp_agcy_name, _.a1_locode)

Unnamed: 0,a1_imp_agcy_city,a2_county,a1_imp_agcy_name,a1_locode
28,Covelo,MEN,Round Valley Indians Tribe,
91,Orleans,SIS,Karuk Tribe,
160,Biggs,BUT,"Biggs, City of",
204,Hawaiian Gardens,LA,"Hawaiian Gardens, City of",
269,San Francisco,ALA,Bay Area Toll Authority,
324,Redding,TEH,Department of Transportation,
395,Los Angeles,LA,California Department of Transportation,
421,Fremont,ALA,"Fremont, City of",
439,Salinas,MON,Monterey County,


In [103]:
(gdf[pd.to_numeric(gdf['a1_locode'], errors='coerce').isnull()])>>select(_.a1_imp_agcy_city, _.a2_county, _.a1_imp_agcy_name, _.a1_locode)

Unnamed: 0,a1_imp_agcy_city,a2_county,a1_imp_agcy_name,a1_locode
28,Covelo,Mendocino,Round Valley Indians Tribe,
91,Orleans,Siskiyou,Karuk Tribe,
159,Biggs,Butte,"Biggs, City of",
202,Hawaiian Gardens,Los Angeles,"Hawaiian Gardens, City of",
265,San Francisco,Alameda,Bay Area Toll Authority,
319,Redding,Tehama,Department of Transportation,
387,Los Angeles,Los Angeles,California Department of Transportation,
413,Fremont,Alameda,"Fremont, City of",
433,Salinas,Monterey,Monterey County,
636,Nice,Lake,Robinson Rancheria,


In [104]:
gdf>>filter(_.a1_locode.isnull())>>select(_.a1_imp_agcy_city, _.a2_county, _.a1_imp_agcy_name, _.a1_locode)

Unnamed: 0,a1_imp_agcy_city,a2_county,a1_imp_agcy_name,a1_locode
433,Salinas,Monterey,Monterey County,


In [105]:
gdf>>filter(_.a1_locode==('None'))>>select(_.a1_imp_agcy_city, _.a2_county, _.a1_imp_agcy_name, _.a1_locode)

Unnamed: 0,a1_imp_agcy_city,a2_county,a1_imp_agcy_name,a1_locode
28,Covelo,Mendocino,Round Valley Indians Tribe,
91,Orleans,Siskiyou,Karuk Tribe,
159,Biggs,Butte,"Biggs, City of",
202,Hawaiian Gardens,Los Angeles,"Hawaiian Gardens, City of",
265,San Francisco,Alameda,Bay Area Toll Authority,
319,Redding,Tehama,Department of Transportation,
387,Los Angeles,Los Angeles,California Department of Transportation,
413,Fremont,Alameda,"Fremont, City of",
636,Nice,Lake,Robinson Rancheria,
742,Anza,Riverside,Cahuilla Band of Indians,


In [106]:
gdf = gdf.replace({'a1_locode': 'None'}, np.nan)

In [107]:
gdf = gdf.astype({'a1_locode':'Int64'})

In [108]:
gdf>>filter(_.a1_locode.isnull())>>select(_.a1_imp_agcy_city, _.a2_county, _.a1_imp_agcy_name, _.a1_locode)

Unnamed: 0,a1_imp_agcy_city,a2_county,a1_imp_agcy_name,a1_locode
28,Covelo,Mendocino,Round Valley Indians Tribe,
91,Orleans,Siskiyou,Karuk Tribe,
159,Biggs,Butte,"Biggs, City of",
202,Hawaiian Gardens,Los Angeles,"Hawaiian Gardens, City of",
265,San Francisco,Alameda,Bay Area Toll Authority,
319,Redding,Tehama,Department of Transportation,
387,Los Angeles,Los Angeles,California Department of Transportation,
413,Fremont,Alameda,"Fremont, City of",
433,Salinas,Monterey,Monterey County,
636,Nice,Lake,Robinson Rancheria,


In [109]:
# for the agencies with no locode, we could coerce the errors to return a "None" value

### Connecting DLA Locodes to application data

* to check- if new locodes are added after ATP project selection

In [110]:
locodes = to_snakecase(
    pd.read_excel("gs://calitp-analytics-data/data-analyses/dla/e-76Obligated/locodes_updated7122021.xlsx")
)

In [111]:
def get_num(x):
    try:
        return int(x)
    except Exception:
        try:
            return float(x)
        except Exception:
            return x  


In [112]:
def check_counties(df):
    
    #subsetting df to just city, countyx, name and locode
    check_locode = gdf>>select(_.a1_imp_agcy_city, _.a2_county, _.a1_imp_agcy_name, _.a1_locode)
    check_locode['a1_locode'] = check_locode['a1_locode'].apply(get_num)
    
    #merge official locodes and subset
    (check_locode.merge(locodes, left_on='a1_imp_agcy_name', right_on='agency_name', how = 'outer', indicator = True))
    
    #look at those that just matched
    both = (check_locode.merge(locodes, left_on='a1_locode', right_on='agency_locode', how = 'outer', indicator = True))>>filter(_._merge == 'both')
    both['agency_locode'] = both['agency_locode'].astype('int')
    
    #add county to county names
    both['a2_county'] = both['a2_county'].astype(str) + ' County'  
    
    #add compare column to compare county names
    compare_names = np.where(both["a2_county"] == both["county_name"], True, False)
    both["compare_county"] = compare_names
    
    #get those where compare_county is false
    no_county_match = (both>>filter(_.compare_county==False))

    no_county_match = no_county_match.drop(columns = ['_merge','active_e76s______7_12_2021_',
                                                      'rtpa_name','mpo_name','mpo_locode_fads'])
    
    no_county_match = no_county_match.rename(columns={'compare_county':'county_match_on_name',
                                                         'agency_locode':'locode_list_agency_locode',
                                                         'agency_name':'locode_list_agency_name',
                                                         'district':'locode_list_district',
                                                         'county_name':'locode_list_county_name'})
    
    #export failed matched to gcs
    no_county_match.to_excel(f"{GCS_FILE_PATH}needs_assistance/failed_locode_county_check.xlsx")

In [113]:
check_counties(gdf)

In [114]:
check_locode = gdf>>select(_.a1_imp_agcy_city, _.a2_county, _.a1_imp_agcy_name, _.a1_locode)

In [115]:
check_locode['a1_locode'] = check_locode['a1_locode'].apply(get_num)

In [116]:
#(check_locode.merge(locodes, left_on='a1_imp_agcy_name', right_on='agency_name', how = 'outer', indicator = True))._merge.value_counts()

In [117]:
#(check_locode.merge(locodes, left_on='a1_locode', right_on='agency_locode', how = 'outer', indicator = True))._merge.value_counts()

In [118]:
#both = (check_locode.merge(locodes, left_on='a1_locode', right_on='agency_locode', how = 'outer', indicator = True))>>filter(_._merge == 'both')

In [119]:
#both['a2_county'] = both['a2_county'].astype(str) + ' County'  

In [120]:
# compare_names = np.where(both["a2_county"] == both["county_name"], True, False)

# both["compare_county"] = compare_names

In [121]:
# both.compare_county.value_counts()

In [122]:
# both['agency_locode'] = both['agency_locode'].astype('int')

In [123]:
# both>>filter(_.compare_county==False)

### Export Name Matches that failed County Name Check
* in function

In [124]:
# no_county_match = (both>>filter(_.compare_county==False))

In [125]:
# no_county_match.to_excel(f"{GCS_FILE_PATH}needs_assistance/needs_assistance_locode_county_check.xlsx")

### Remaining Columns

In [126]:
remaining_locodes = ((check_locode.merge(locodes, left_on='a1_locode', right_on='agency_locode', how = 'outer', indicator = True))
    >>filter(_._merge== 'left_only'))

In [127]:
remaining_locodes = remaining_locodes.drop(columns=[
                                                   'agency_name',
                                                   'district',
                                                   'county_name',
                                                   'rtpa_name',
                                                   'mpo_name',
                                                   'mpo_locode_fads',
                                                   'active_e76s______7_12_2021_'])

In [128]:
## does not work
# compare_names = np.where(remaining_locodes["a1_imp_agcy_name"] == locodes["agency_name"], True, False)
# remaining_locodes["compare_names"] = compare_names

### Match using Fuzzy Matcher

* fuzzymatcher gives low scores for some potential matches

In [129]:
#!pip install fuzzymatcher

In [130]:
# import fuzzymatcher

In [131]:
# matching = fuzzymatcher.fuzzy_left_join(remaining_locodes, locodes, 'a1_imp_agcy_name', 'agency_name')

In [132]:
# matching>>arrange(-_.best_match_score)

### Match using isin

In [133]:
#matching['isin'] = matching.names.isin(matching.names)

In [134]:
# matching['isin'] = matching['agency_name'].isin(matching['a1_imp_agcy_name'])

In [135]:
# matching

### Match using Str Contains
* code help:  https://stackoverflow.com/questions/48631769/pandas-str-contains-search-for-multiple-values-in-a-string-and-print-the-value
* merge those that have str contains: https://towardsdatascience.com/joining-dataframes-by-substring-match-with-python-pandas-8fcde5b03933

In [136]:
# get list of column values from the lOfficial list of Locodes
#names = matching['agency_name'].tolist()

In [137]:
# pattern = '|'.join(names)

# matching['contains'] = matching['a1_imp_agcy_name'].str.contains(pattern, case=False)

### Export potential that match with Str Contains

In [138]:
def find_potential_locode_matches(df):
    
    locodes = to_snakecase(pd.read_excel("gs://calitp-analytics-data/data-analyses/dla/e-76Obligated/locodes_updated7122021.xlsx"))
    
    check_locode = df>>select(_.a1_imp_agcy_city, _.a2_county, _.a1_imp_agcy_name, _.a1_locode)
    check_locode['a1_locode'] = check_locode['a1_locode'].apply(get_num)

    remaining_locodes = ((check_locode.merge(locodes, left_on='a1_locode', right_on='agency_locode', how = 'outer', indicator = True))
    >>filter(_._merge== 'left_only'))
    
    remaining_locodes = remaining_locodes.drop(columns=[
        'agency_name',
        'district', 'county_name',
        'rtpa_name', 'mpo_name',
        'mpo_locode_fads', 'active_e76s______7_12_2021_'])
    
    #convert locodes to list of names
    names_list = locodes['agency_name'].tolist()
    pattern_names = '|'.join(names_list)
    
    #using str contains to see if there are any potnential matches with the official dla locodes
    remaining_locodes['contains'] = remaining_locodes['a1_imp_agcy_name'].str.contains(pattern_names, case=False)
    
    #get list of those with no matches
    no_match = remaining_locodes >>filter(_.contains==False)
    no_match.to_excel(f"{GCS_FILE_PATH}needs_assistance/no_match_locodes.xlsx")
    
    #get list of those with potential matches
    matches = (remaining_locodes>>filter(_.contains==True))
    matches['join'] = 1
    locodes['join'] = 1

    df_full = matches.merge(locodes, on='join').drop('join', axis=1)
    
    locodes.drop('join', axis=1, inplace=True)
    
    df_full['match'] = df_full.apply(lambda x: x.agency_name.find(x.a1_imp_agcy_city), axis=1).ge(0)    
    potential_matches = (df_full>>filter(_.match==True))
    potential_matches = potential_matches.drop(columns = ['agency_locode_x', '_merge',
                                                          'active_e76s______7_12_2021_',
                                                          'rtpa_name','mpo_name',
                                                          'mpo_locode_fads', 'match'])
    
    potential_matches = potential_matches.rename(columns={'contains':'potential_match',
                                                         'agency_locode_y':'locode_list_agency_locode',
                                                         'agency_name':'locode_list_agency_name',
                                                         'district':'locode_list_district',
                                                         'county_name':'locode_list_county_name'})
    
    # export potential match list
    potential_matches.to_excel(f"{GCS_FILE_PATH}needs_assistance/needs_assistance_potential_match_locodes.xlsx")

    return potential_matches

In [139]:
(find_potential_locode_matches(gdf))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,a1_imp_agcy_city,a2_county,a1_imp_agcy_name,a1_locode,potential_match,locode_list_agency_locode,locode_list_agency_name,locode_list_district,locode_list_county_name
87,Biggs,Butte,"Biggs, City of",,True,5128,Biggs,3,Butte County
1688,Hawaiian Gardens,Los Angeles,"Hawaiian Gardens, City of",,True,5387,Hawaiian Gardens,7,Los Angeles County
2388,Fremont,Alameda,"Fremont, City of",,True,5322,Fremont,4,Alameda County
3556,Salinas,Monterey,Monterey County,,True,5045,Salinas,5,Monterey County
3557,Salinas,Monterey,Monterey County,,True,6010,Salinas City Line,5,Monterey County
3570,Salinas,Monterey,Monterey County,,True,6011,Monterey Salinas Transit,5,Monterey County
4723,Pomona,Los Angeles,"California State Polytechnic University, Pomon...",,True,6339,Pomona Valley Transportation Authority,7,Los Angeles County
4784,Pomona,Los Angeles,"California State Polytechnic University, Pomon...",,True,5070,Pomona,7,Los Angeles County


In [140]:
#names_list = locodes['agency_name'].tolist()

In [141]:
#pattern_names = '|'.join(names_list)

In [142]:
#remaining_locodes['contains'] = remaining_locodes['a1_imp_agcy_name'].str.contains(pattern_names, case=False)

In [143]:
#remaining_locodes

In [144]:
#matches = remaining_locodes>>filter(_.contains==True)

In [145]:
#matches

In [146]:
# merging matches from remaining locodes and official locodes on str.contains 

In [147]:
# matches['join'] = 1
# locodes['join'] = 1

# dfFull = matches.merge(locodes, on='join').drop('join', axis=1)
# locodes.drop('join', axis=1, inplace=True)

In [148]:
# dfFull

In [149]:
# dfFull['match'] = dfFull.apply(lambda x: x.agency_name.find(x.a1_imp_agcy_city), axis=1).ge(0)

In [150]:
# dfFull

In [151]:
# dfFull>>filter(_.match==True)