# ATP Data Exploration

In [1]:
import intake

import numpy as np
import pandas as pd

from calitp import to_snakecase

from dla_utils import _dla_utils

from shared_utils import altair_utils, styleguide
from siuba import *



In [2]:
pd.set_option("display.max_columns", 220)

## Reading in w/o utils

In [3]:
main_details = to_snakecase(
    pd.read_excel("gs://calitp-analytics-data/data-analyses/dla/atp/Main Details.xls")
)
project_details = to_snakecase(
    pd.read_excel(
        "gs://calitp-analytics-data/data-analyses/dla/atp/Project Details.xls"
    )
)

In [4]:
# main_details.info()

In [5]:
# project_details.project_cycle.value_counts()

In [6]:
# project_details>>count(_.project_app_id)>>filter(_.n>1)

In [7]:
# project_details>>group_by(_.project_cycle)>>count(_.project_app_id)>>filter(_.n>1)

* multiple project ids, but no duplicates by project cycle. 

In [8]:
## merging

In [9]:
# df = pd.merge(main_details, project_details, how="outer", on=["project_app_id", "project_cycle"], indicator='matches')

In [10]:
# (df>>filter(_.project_app_id =='1-Mendocino Council of Governments-1')>>select(_.project_app_id,
#                                                                               _.project_cycle,
#                                                                              _.matches,
#                                                                               _.agency_app_num))

In [11]:
# df.info()

### Comparing column names

code help: https://stackoverflow.com/questions/45482755/compare-headers-of-dataframes-in-pandas

In [12]:
# columns in both dfs
main_details.columns.intersection(project_details.columns)

Index(['project_app_id', 'project_cycle', 'awarded'], dtype='object')

In [13]:
# columns in main_details not in project_details
main_details.columns.difference(project_details.columns)

Index(['a1_imp_agcy_city', 'a1_imp_agcy_contact', 'a1_imp_agcy_email',
       'a1_imp_agcy_fed_ma_num', 'a1_imp_agcy_ma', 'a1_imp_agcy_name',
       'a1_imp_agcy_phone', 'a1_imp_agcy_state_ma_num', 'a1_imp_agcy_street',
       'a1_imp_agcy_title', 'a1_imp_agcy_zip', 'a1_letter_of_intent',
       'a1_locode', 'a1_proj_partner_agcy', 'a1_proj_partner_contact',
       'a1_proj_partner_email', 'a1_proj_partner_exists',
       'a1_proj_partner_phone', 'a1_proj_partner_title', 'a2_assem_dist_a',
       'a2_assem_dist_b', 'a2_assem_dist_c', 'a2_congress_dist_a',
       'a2_congress_dist_b', 'a2_congress_dist_c', 'a2_county', 'a2_ct_dist',
       'a2_info_proj_descr', 'a2_info_proj_loc', 'a2_info_proj_name',
       'a2_mop_uza_population', 'a2_mpo', 'a2_output_outcome', 'a2_past_proj',
       'a2_past_proj_qty', 'a2_proj_lat', 'a2_proj_long',
       'a2_proj_scope_summary', 'a2_project_location_map', 'a2_rtpa',
       'a2_senate_dist_a', 'a2_senate_dist_b', 'a2_senatedistc',
       'a3_current

In [14]:
# columns in project_details not in main_details
project_details.columns.difference(main_details.columns)

Index(['a4_act_other_1', 'a4_act_other_1_descr', 'a4_act_other_2',
       'a4_act_other_2_decr', 'a4_after_school', 'a4_bike_classes',
       'a4_bike_gap_pct', 'a4_bike_rodeos', 'a4_bike_train', 'a4_classrooms',
       ...
       'v_other_traffic_calming_imprv_2', 'v_other_traffic_calming_qty_1',
       'v_other_traffic_calming_qty_2', 'v_remove_right_turn_pocket',
       'v_remove_travel_ln', 'v_sig_inter_new_roundabout',
       'v_sig_inter_timing_improv', 'v_speed_feedback_signs',
       'v_un_sig_inter_new_roundabout', 'v_un_sig_inter_new_traf_sig'],
      dtype='object', length=132)

## Reading in w/ utils

In [15]:
import utils

In [16]:
df = utils.read_in_data()

In [17]:
df.head()

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_exhibit22_plan,attch_letters_of_support,attch_link,attch_ni_workplan,attch_project_estimate,completed_pdf_form,main_datetime_stamp,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,matches
0,Merced,10-5939R,Yes,Merced County,00033S,345 west 7th street,Deputy Director,95340,,,No,,21,,,16,,,Merced,10,"PA&ED, PS&E, and CON funding for construction ...",1) South side of Haskell Ave from Cody ave to ...,Planada Sidewalk Infill Project,Project is located outside one of the ten larg...,MCAG,No,0,37.29,120.31,The Planada Sidewalk Infill Project is located...,,,12,,,,No,,Yes,,Yes,,No,Infrastructure - Small,Yes,20,1,Yes,80,Yes,0,,0,No,1,1802,Planada Sidewalk infill ATP cross section 1.pdf,Attachment A- Signature Page.pdf,Existing Photos Attachment.pdf,Planada ATP Plan Concept.pdf,Attachment-B-Engr-Checklist (MH).pdf,,Letters of Support.pdf,,,Project Estimate.pdf,,2020-06-09 10:33:08,10-Merced County-1,CYCLE 5,N,5939,No,No,,Sidewalk infill along portions of Haskell aven...,No,0,0,0,1500,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,0,0,0,0,0,6,0,1500,0,,0,,0,5,0,4,3,0,0,0,0,0,,0,0.0,0,0,0,0,No,No,Yes,,0,0,,0,0,0,0,0,0,0,1802,2020-06-09 10:33:08,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both
1,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Bishop Street Class 3 Bicycle Boulevard with T...,Bishop Street from Flower Street to Standard A...,Bishop Street Bicycle Boulevard Project,Project is located within one of the ten large...,SCAG,Yes,2,33.74,117.86,This project will implement a Class 3 bicycle ...,,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Medium,Yes,50,0,Yes,50,No,0,,0,No,4,1811,Attachment K - Not Applicable.pdf,Attachment A - Signature Page.pdf,Attachment E - Photos of Existing Conditions.pdf,Attachment D - Project .Plans.pdf,Attachment B - Checklist.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost .Estimate.pdf,,2020-08-20 18:49:12,"12-Santa Ana, City of-4",CYCLE 5,N,5063,No,No,,"Install 1.15 mile bike boulevard, construction...",Yes,0,0,0,6336,0,,0,0,0,0,0,0,0,,0,,0,0,2,0,0,,0,0,0,0,,0,,0,0,0,100,0,0,0,0,0,0,0,0,,0,,0,38,0,15,16,0,18,3,0,0,,1,6.0,0,18,0,0,Yes,No,No,,0,0,,0,0,8800,0,0,0,0,1811,2020-08-20 18:49:12,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both
2,City of Pacifica,04-5350-F15,Yes,"Pacifica, City of",,151 Milagra Drive,Associate Civil Engineer,94044,,,No,,22,,,14,,,San Mateo,4,CON funding for installing bicycling facilitie...,On Palmetto Ave between Paloma Ave and West Av...,Palmetto Ave - Esplanade Ave Bicycle & Pedestr...,Project is located outside one of the ten larg...,MTC,No,0,37.65,-122.49,The project will install a combination of Clas...,,,13,,,,No,,Yes,,Yes,,No,Infrastructure - Small,Yes,50,2,Yes,50,No,0,,0,No,1,1804,,Attachment-A-Signature-page.pdf,Photos.pdf,Attachment D_Palmetto & Esplanade Ped-Bike Imp...,Attachment B_Engineers Checklist.pdf,,Letters of Support.pdf,,,Attachment F_ ATP Cycle 5_Palmetto-Esplanade B...,,2020-06-15 11:05:03,"4-Pacifica, City of-1",CYCLE 5,N,5350,0,0,,Bicycling and pedestrian amenities will be ins...,Yes,0,0,13752,5748,0,,0,0,0,0,0,0,0,,0,,0,0,0,1,0,,0,0,0,0,,0,,0,0,0,40,2,0,0,0,20,0,0,0,,0,,0,9,0,0,0,0,0,0,0,0,,0,0.0,0,0,0,0,Yes,No,No,,0,0,,0,0,0,0,0,0,0,1804,2020-06-15 11:05:03,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both
3,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Pedestrian traffic safety improvements for Jef...,"In the City of Santa Ana, the safe routes to s...",Jefferson ES_Thorpe Fundamental_McFadden Int_G...,Project is located within one of the ten large...,SCAG,Yes,2,33.71,117.89,"This project will be repairing, replacing and ...",,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Large,No,0,5,Yes,100,Yes,0,,0,No,13,1822,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D -Plans.pdf,Attachment B - Check list.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,,2020-09-08 10:15:52,"12-Santa Ana, City of-13",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 8 intersections, ...",Yes,0,0,0,0,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,50,0,0,0,0,60,0,0,0,Left Turn Arrow,3,Enhanced Crosswalk Unsignalized,3,218,1000,7,0,0,1,0,0,0,,0,0.0,0,7,0,0,Yes,No,No,,0,0,,0,0,0,0,0,0,0,1822,2020-09-08 10:15:52,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both
4,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Pedestrian traffic safety improvements for La...,"In the City of Santa Ana, the safe routes to s...",Lathrop Intermediate_Lowell ES_Martin ES_Pio P...,Project is located within one of the ten large...,SCAG,Yes,4,33.73,117.87,"This project will be repairing, replacing and ...",,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Large,No,0,5,Yes,100,Yes,0,,0,No,14,1823,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D - Plan.pdf,Attachment B - Checklist.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,,2020-08-31 12:34:31,"12-Santa Ana, City of-14",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 6 intersections, ...",Yes,0,0,0,0,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,50,0,0,0,0,43,0,0,0,Enhance crosswalk (unsignalized),7,Raised Crosswalk,2,189,3455,5,0,0,1,0,0,0,,0,0.0,2,5,0,0,Yes,No,No,,0,0,,0,0,0,0,0,0,0,1823,2020-08-31 12:34:31,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both


In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 882 entries, 0 to 881
Columns: 211 entries, a1_imp_agcy_city to matches
dtypes: category(1), datetime64[ns](2), float64(23), int64(97), object(88)
memory usage: 1.4+ MB


In [19]:
df.matches.value_counts()

both          882
left_only       0
right_only      0
Name: matches, dtype: int64

In [20]:
df.awarded_y.value_counts()

N    882
Name: awarded_y, dtype: int64

### Comparing merged df with cleaned data

In [21]:
#removing columns with agency staff information from cleaned df
columns_to_drop = [
    "a1_imp_agcy_contact",
    "a1_imp_agcy_email",
    "a1_imp_agcy_phone",
    "a1_proj_partner_contact",
    "a1_proj_partner_email",
    "a1_proj_partner_phone",
]

In [22]:
alldata = to_snakecase(
    pd.read_excel(
        "gs://calitp-analytics-data/data-analyses/dla/atp/Master_AllData_Cycle5FieldMapping.xls",
        sheet_name="AllData",
    )
)

In [23]:
alldata = alldata.drop(columns=columns_to_drop)

In [24]:
alldata.sample()

Unnamed: 0,awarded,project_cycle,a2_ct_dist,#,atp_id,ppno,ppno_1,a3_proj_type,project_app_id,a2_info_proj_name,a2_county,a1_locode,a1_imp_agcy_name,a1_imp_agcy_street,a1_imp_agcy_city,a1_imp_agcy_zip,a1_imp_agcy_title,a1_imp_agcy_ma,a1_imp_agcy_state_ma_num,a1_imp_agcy_fed_ma_num,a1_proj_partner_exists,a1_proj_partner_agcy,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a2_info_proj_descr,a2_info_proj_loc,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_current_plan,a3_trails,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo
257,N,CYCLE 5,7,,,,,Infrastructure - Medium,7-Los Angeles County-4,Los Nietos Pedestrian Access Improvements,Los Angeles,5953,Los Angeles County,900 South Fremont Avenue,Alhambra,91803,Civil Engineer,Yes,00307S (2006),07-5953RF15 (2015),No,,,57,,,38,,,32,,,Construction of pedestrian improvements includ...,Various roadways within the unincorporated Los...,Project is located within one of the ten large...,SCAG,Yes,3,33.98,-118.07,EXISTING CONDITIONS\rThe Los Nietos Pedestrian...,,,,No,,Yes,,Yes,,No,No,0,5,Yes,100,No,0,,0,Yes,No,0,0,Vision Zero Los Angeles County:A Plan for Safe...,Pedestrian safety and mobility enhancements in...,0,0,0,0,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,40,0,0,0,0,40,0,11700,0,,0,,0,17,0,37,0,0,5,0,0,170,,0,0,0,0,8,0,Yes,No,No,,0,0,,0,0,0,0,0,0,0,N,0,N,0,N,0,N,0,,0,N,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No


In [25]:
cleaned = to_snakecase(
    pd.read_excel(
        "gs://calitp-analytics-data/data-analyses/dla/atp/Master_AllData_Cycle5FieldMapping.xls",
        sheet_name="AllDataFieldMapping Cleaned",
    )
)

In [26]:
cleaned.sample()

Unnamed: 0,awarded,project_cycle,a2_ct_dist,#,atp_id,ppno,ppno_1,a3_proj_type,project_app_id,a2_info_proj_name,a2_county,a1_locode,a1_imp_agcy_name,a1_imp_agcy_street,a1_imp_agcy_city,a1_imp_agcy_zip,a1_imp_agcy_contact,a1_imp_agcy_title,a1_imp_agcy_email,a1_imp_agcy_phone,a1_imp_agcy_ma,a1_imp_agcy_state_ma_num,a1_imp_agcy_fed_ma_num,a1_proj_partner_exists,a1_proj_partner_agcy,a1_proj_partner_contact,a1_proj_partner_title,a1_proj_partner_email,a1_proj_partner_phone,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,congressional_district,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,senate_district,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a2_info_proj_descr,a2_info_proj_loc,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_current_plan,a3_trails,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,...,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,original_prog__amt___pa_ed_,orig__prog__year__pa_ed_,original_prog__amt___ps_e_,orig__prog__year__ps_e_,original_prog__amt___rw_,orig__prog__year__rw_,orignal_prog__amt___con_,orig__prog__year__con_,original_prog__amt___con_ni_,orig__prog__year__con_ni_,unnamed:_215,prog__amount__paed__1,prog__amount__pse__1,prog__amount__rw__1,prog__amount__con__1,prog__amount__con_ni__1,unnamed:_221,fund_year_1,prog__amount__paed__2,prog__amount__pse__2,prog__amount__rw__2,prog__amount__con__2,prog__amount__con_ni__2,unnamed:_228,fund_year_2,prog__amount__paed__3,prog__amount__pse__3,prog__amount__rw__3,prog__amount__con__3,prog__amount__con_ni__3,unnamed:_235,fund_year_3,prog__amount__paed__4,prog__amount__pse__4,prog__amount__rw__4,prog__amount__con__4,prog__amount__con_ni__4,unnamed:_242,fund_year_4,unnamed:_244,unnamed:_245,unnamed:_246,unnamed:_247,unnamed:_248,unnamed:_249,unnamed:_250,unnamed:_251,unnamed:_252
151,N,5,8,,,,,Infrastructure + NI - Medium,8-Riverside County-5,Grand Avenue Pedestrian and Bicycle Safety Imp...,RIV,5956,Riverside County,4080 Lemon Street,Riverside,92501,Dennis Acuna,County Traffic Engineer,dacuna@rivco.org,951-955-6800,Yes,00010S,08-05956R,Yes,City of Wildomar,"Daniel York, PE, PLS",Assistant City Manager,dyork@cityofwildomar.org,951-677-7751,67,67,,,42,42,,,28,28,,,"Grand Avenue Sidewalk, Curb Ramps and Bike Lan...",Grand Avenue in the Lakeland Village area of R...,Project is located within one of the ten large...,SCAG,No,0,33.63,-117.32,The proposed project is on Grand Ave in Lakela...,,,,No,,No,,No,,No,Yes,50,2,Yes,50,Yes,0,,0,No,No,0,0,City of Wildomar Mobility Plan - See Additiona...,"Construct 7,000 feet of concrete sidewalks, cu...",0,0,13000,0,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,50,0,0,...,0,0,0,0,0,0,0,N,0,N,0,Y,100,N,0,,0,N,,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,0,,Y,N,Y,Y,N,N,,Spanish,Y,Y,Y,Y,N,N,,N,N,N,N,No,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [27]:
# filter_col = [col for col in cleaned if col.startswith('unnamed')]

In [28]:
# remove columns that have some agency specific information
cleaned = cleaned.drop(columns=columns_to_drop)

In [29]:
# remove columns that were manually entered - last 34 columns
cleaned = cleaned.drop(columns=(cleaned.iloc[:, 199:]))

In [30]:
# remove columns that are blank and unnamed
# cleaned=cleaned.drop(columns=filter_col)

In [31]:
# cleaned.columns.get_loc("original_prog__amt___pa_ed_")

In [32]:
# (cleaned.iloc[:, 199:].columns.tolist())

In [33]:
# making sure they are null
# (cleaned.iloc[1:, 199:]).info()

In [34]:
cleaned.sample()

Unnamed: 0,awarded,project_cycle,a2_ct_dist,#,atp_id,ppno,ppno_1,a3_proj_type,project_app_id,a2_info_proj_name,a2_county,a1_locode,a1_imp_agcy_name,a1_imp_agcy_street,a1_imp_agcy_city,a1_imp_agcy_zip,a1_imp_agcy_title,a1_imp_agcy_ma,a1_imp_agcy_state_ma_num,a1_imp_agcy_fed_ma_num,a1_proj_partner_exists,a1_proj_partner_agcy,a1_proj_partner_title,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,congressional_district,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,senate_district,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a2_info_proj_descr,a2_info_proj_loc,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_current_plan,a3_trails,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo
281,N,5,11,,,,,Infrastructure - Large,"11-Chula Vista, City of-1","F Street Promenade Phase I, from Bay Boulevard...",SD,5203,"Chula Vista, City of",Engineering Department - 276 Fourth Avenue,Chula Vista,91910,Principal Civil Engineer,Yes,00223S,11-5203R,No,,,80,80,,,51,51,,,40,40,,,Construct 12-foot wide multi-use path for pede...,East of San Diego Bay in northwestern Chula Vi...,Project is located within one of the ten large...,SANDAG,Yes,2,32.64,117.1,The project scope includes street and sidewalk...,,,,Yes,,No,,No,,No,Yes,50,6,Yes,50,Yes,0,,0,Yes,No,0,0,F Street Promenade Streetscape Master Plan (Ma...,Reducing the travel lanes to one lane and by a...,0,4000,500,0,0,,0,0,0,0,0,6,0,Wayfinding Signs,0,Pavement Marking,0,0,0,0,0,,0,0,0,0,,0,,0,0,20,100,0,0,2,1,4,3740,260,0,Wayfinding Signs,0,Elimination of wooden power poles,18,8,3740,4,0,0,4,0,8,84,Bottle and Pink Trumpet Trees,0,1,0,0,7,3740,No,Yes,Yes,Enhanced crosswalks,0,0,Bulb-outs,0,0,4000,0,1,0,1,N,0,N,0,N,0,N,0,,0,N,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No


### How do the merged and cleaned up columns match up?

In [35]:
df.columns.intersection(cleaned.columns)

Index(['a1_imp_agcy_city', 'a1_imp_agcy_fed_ma_num', 'a1_imp_agcy_ma',
       'a1_imp_agcy_name', 'a1_imp_agcy_state_ma_num', 'a1_imp_agcy_street',
       'a1_imp_agcy_title', 'a1_imp_agcy_zip', 'a1_proj_partner_agcy',
       'a1_proj_partner_exists',
       ...
       'a4_collab_non_profit', 'a4_collab_schools', 'a4_collab_pub_works',
       'a4_collab_other', 'a4_colab_other_descr', 'a4_plan_ped',
       'a4_plan_bike', 'a4_plan_atp', 'a4_plan_school_routes',
       'a4_row_open_street_demo'],
      dtype='object', length=191)

In [36]:
# columns in df not in cleaned
df.columns.difference(cleaned.columns)

Index(['a1_letter_of_intent', 'agency_app_num', 'app_fk', 'app_pk',
       'attch_addtl_attachments', 'attch_app_sig_page',
       'attch_conditions_photos', 'attch_conditions_project_map',
       'attch_engineeers_checklist', 'attch_exhibit22_plan',
       'attch_letters_of_support', 'attch_link', 'attch_ni_workplan',
       'attch_project_estimate', 'awarded_x', 'awarded_y',
       'completed_pdf_form', 'details_datetime_stamp', 'main_datetime_stamp',
       'matches'],
      dtype='object')

In [37]:
# columns in cleaned not in df
cleaned.columns.difference(df.columns)

Index(['#', 'assembly_district', 'atp_id', 'awarded', 'congressional_district',
       'ppno', 'ppno_1', 'senate_district'],
      dtype='object')

In [38]:
len(cleaned)

454

## Assembly Congressional and Senate Districts

In [39]:
ad = (
    cleaned
    >> select(
        _.a1_imp_agcy_name,
        _.assembly_district,
        _.a2_assem_dist_a,
        _.a2_assem_dist_b,
        _.a2_assem_dist_c,
        # _.congressional_district,
        # _.a2_congress_dist_a,
        # _.a2_congress_dist_b,
        # _.a2_congress_dist_c,
        # _.senate_district,
        # _.a2_senate_dist_a,
        # _.a2_senate_dist_b,
        # _.a2_senatedistc,
    )
)

In [40]:
ad.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 454 entries, 0 to 453
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   a1_imp_agcy_name   454 non-null    object 
 1   assembly_district  454 non-null    object 
 2   a2_assem_dist_a    454 non-null    int64  
 3   a2_assem_dist_b    62 non-null     float64
 4   a2_assem_dist_c    29 non-null     float64
dtypes: float64(2), int64(1), object(2)
memory usage: 17.9+ KB


### Unsuccessful Methods

In [43]:
## Need to join two columns together if they have values

In [44]:
## code help: https://stackoverflow.com/questions/52889130/how-to-remove-zeros-after-decimal-from-string-remove-all-zero-after-dot
#ad["a2_assem_dist_b"].map("{0:g}".format)

In [45]:
##code help:
# https://stackoverflow.com/questions/49091259/pandas-looping-through-rows-and-skipping-over-rows
# https://stackoverflow.com/questions/36774602/concatenate-two-numerical-values-to-make-a-new-column-using-pandas

In [46]:
# ## Code help: https://stackoverflow.com/questions/56119307/pandas-conditionally-concat-two-columns
# mask = (ad["a2_assem_dist_b"] < 10).fillna(False)

# ad["assem_dist_combined2"] = ad.loc[mask, "a2_assem_dist_b"].map("{0:g}".format) + ad[
#     "a2_assem_dist_c"
# ].map("{0:g}".format)

In [47]:
# ad["assem_dist_combined2"] = np.where(
#         ad.assem_dist_combined2.isnull(),
#         ad["a2_assem_dist_b"], ad["assem_dist_combined2"])

In [48]:
# ad["assem_dist_combined2"] = np.where(
#         ad.assem_dist_combined2.isnull(),
#         (ad["a2_assem_dist_b"].map("{0:g}".format) + ', ' + ad["a2_assem_dist_c"].map("{0:g}".format)),
#          ad["assem_dist_combined2"])

In [49]:
## another attempt:

## this will combine all 
## code help: https://stackoverflow.com/questions/55526620/how-to-combine-non-null-entries-of-columns-of-a-dataframe-into-a-new-column
#df["assem_dist_combined3"] = df.agg(lambda x: x.dropna().str.cat(sep=','), axis=1)

## this combines set columns
## code help: https://stackoverflow.com/questions/45787782/combine-multiple-columns-in-pandas-excluding-nans
# cols = ['a2_assem_dist_b', 'a2_assem_dist_c']
# ad["assem_dist_combined3"] = ad[cols].agg(lambda x: x.dropna().tolist(), axis=1)


### Function

Requirements for function:
* when `a2_assem_dist_a` == 0 AND `a2_assem_dist_b` & `assem_dist_c` are less than 10, **then combine of `2_assem_dist_b` & `assem_dist_c` to one number.**
* when `a2_assem_dist_a` is less than 10 AND `a2_assem_dist_b` is less than 10 AND `assem_dist_c` is null, **then take combine `2_assem_dist_a` & `assem_dist_b` (can be one number or two)**
* when `a2_assem_dist_a` == 1 AND `a2_assem_dist_b` is less than 10, **then combine `a2_assem_dist_a` & `2_assem_dist_b` with a comma**
* when `a2_assem_dist_a` is notnull AND `a2_assem_dist_b` & `assem_dist_c` are null,** then `ssembly_district`== `2_assem_dist_a`**
* when `a2_assem_dist_a` & `a2_assem_dist_b` is >= 10 AND `assem_dist_c` is null, **then take combination of `2_assem_dist_a` & `assem_dist_b` with a comma**
* when `a2_assem_dist_a` & `a2_assem_dist_c` is >= 10 AND `assem_dist_b` is null, **then take combination of `2_assem_dist_a` & `assem_dist_c` with a comma**


In [50]:
def format_districts(df, col_a, col_b, col_c, new_col):
    
    #rename columns to alias
    df = df.rename(columns = {col_a:'a',
                              col_b:'b',
                              col_c:'c'})
    #fix types
    df = df.astype({'a':'Int64',
                    'b':'Int64',
                    'c':'Int64'})
    
    #replace null values with numeric
    df["a"].fillna(9999999, inplace=True)
    df["b"].fillna(9999999, inplace=True)
    df["c"].fillna(9999999, inplace=True)
    
    def district_status(row):
        if (row.a == 0) and (row.b < 10) and (row.c < 10):
            return (str(row["b"])) + (str(row["c"]))
        
        elif (row.a < 10) and (row.b < 10) and not (row.c == 9999999):
            return (str(row["a"])) + (str(row["b"]))
        
        elif (row.a>=1) and (row.b == 9999999) and (row.c == 9999999):
            return (row["a"])
        
        elif (row.a >= 10) and (row.b>= 10) and (row.c == 9999999):
            return (str(row["a"])) + ', ' + (str(row["b"]))
        
        elif (row.a >= 10) and (row.b == 9999999)  and (row.c >= 10):
            return  (str(row["a"])) + ', ' + (str(row["c"]))
        
        elif (row.a >= 1) and (row.b == 0) and (row.c == 0):
            return  (str(row["a"])) 
        
        elif (row.a >= 1) and not (row.b == 0) and not (row.b == 9999999) and not (row.c == 9999999):
            return  (str(row["a"])) + ', ' + (str(row["b"])) + ', ' + (str(row["c"]))
        
        else:
            return "Needs Manual Assistance"
    
    #apply function
    df[new_col] = df.apply(lambda x: district_status(x), axis=1)
    
    #replace values back to null
    df = df.replace({'a': 9999999, 'b': 9999999, 'c':9999999}, np.nan)
    
    #rename columns back to original
    df = df.rename(columns = {'a':col_a,
                              'b':col_b,
                              'c':col_c})
  
    return df
    

In [51]:
## test on the subsetted df 
## still using cleaned df
(format_districts(ad, "a2_assem_dist_a", "a2_assem_dist_b", "a2_assem_dist_c", "assembly_district2")).sample(20)

Unnamed: 0,a1_imp_agcy_name,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,assembly_district2
239,Sacramento County,8,8,,,8
71,Nevada County Transportation Commission,3,3,,,3
130,"Visalia, City of",26,26,,,26
352,Shasta County,1,1,,,1
302,San Diego County,79,79,,,79
189,"Manteca, City of",12,12,,,12
110,"Lathrop,City of",12,12,,,12
283,"Solana Beach, City of",74,74,,,74
264,"Pasadena, City of",41,41,,,41
63,San Francisco Municipal Transportation Agency,17,17,,,17


In [52]:
## check which ones dont fit the arguments 
## still using cleaned df

(format_districts(ad, "a2_assem_dist_a", "a2_assem_dist_b", "a2_assem_dist_c", "assembly_district2"))>>filter(_.assembly_district2=="Needs Manual Assistance")

Unnamed: 0,a1_imp_agcy_name,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,assembly_district2
6,Butte County,"1, 3",1,3,,Needs Manual Assistance
12,"San Bernardio, City of",40,4,0,,Needs Manual Assistance
276,Yolo County,"4, 7",4,7,,Needs Manual Assistance
365,"Lynwood, City of","6, 3",6,3,,Needs Manual Assistance
380,"Vallejo, City of","14, 4",14,4,,Needs Manual Assistance


In [53]:
## test on main df with assembly districts 
df = ((format_districts(df, "a2_assem_dist_a", "a2_assem_dist_b", "a2_assem_dist_c", "assembly_district"))
 )
(df>>select(_.a2_assem_dist_a, _.a2_assem_dist_b, _.a2_assem_dist_c, _.assembly_district)).sample(20)

Unnamed: 0,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,assembly_district
405,2,,,2
796,43,,,43
725,13,,,13
863,18,,,18
590,26,,,26
53,26,,,26
512,74,,,74
741,7,,,7
183,8,,,8
431,77,,,77


In [54]:
## apply function for the other two columns: congressional district and senate district

df = (format_districts(df, "a2_congress_dist_a", "a2_congress_dist_b", "a2_congress_dist_c", "congressional_district"))
df = (format_districts(df, "a2_senate_dist_a", "a2_senate_dist_b", "a2_senatedistc", "senate_district"))

In [55]:
df.sample(5)

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_exhibit22_plan,attch_letters_of_support,attch_link,attch_ni_workplan,attch_project_estimate,completed_pdf_form,main_datetime_stamp,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,matches,assembly_district,congressional_district,senate_district
71,Nevada City,,No,Nevada County Transportation Commission,,"101 Providence Mine Road, Suite 102",Executive Director,95959,A1 City of Grass Valley Partner Agency Letter.pdf,City of Grass Valley,Yes,Assistant City Engineer,3,,,1,,,Nevada,3,Construct new oblong roundabout with high-visi...,"The interchange of SR 174 and SR 49/20, includ...",SR 174/49/20 Roundabout and Active Transportat...,Project is located outside one of the ten larg...,Caltrans,No,0,39.22,-121.06,This project improves safety and comfort for p...,,Nevada CTC,1,,,,Yes,,Yes,,Yes,,No,Infrastructure - Medium,Yes,55,3,Yes,45,No,0,,0,No,1,1992,CK Additional Attachments.pdf,CA Signature Page.pdf,CE Photos of Existing Conditions.pdf,CD Project Plans.pdf,CB Engineer's Checklist.pdf,,CI Letters of Support.pdf,,,CF Project Estimate.pdf,,2020-09-14 06:53:06,3-Nevada County Transportation Commission-1,CYCLE 5,N,6144,0,0,Grass Valley Street System Master Plan; Nevada...,Construction of a new roundabout with enhanced...,Yes,0,1362,0,0,0,,0,0,0,0,0,0,0,Fencing (Cl. I separation) - LF,640,,0,0,0,0,5,,0,0,0,0,,0,,0,0,0,0,2,2,0,2,0,0,183,0,,0,,0,0,0,3,0,3,0,0,0,0,,0,0.0,1,1,0,0,No,No,Yes,,0,0,,0,0,0,0,1,0,1,1992,2020-09-14 06:53:06,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,3,1,1
530,Jurupa Valley,08-5487-S21,Yes,"Jurupa Valley, City of",08-5487-S21,8930 Limonite Avenue,Assistant City Engineer,92509,,,No,,60,,,41,,,Riverside,8,The Jurupa Valley Granite Hill SRTS gap closur...,"In the City of Jurupa Valley, on various stree...",Jurupa Valley Granite Hill Area SRTS Sidewalk ...,Project is located within one of the ten large...,SCAG,No,0,,,The City of Jurupa Valley has identified corri...,,,31,,,,Yes,,Yes,,Yes,,Yes,Infrastructure - Medium,No,0,2,Yes,100,Yes,0,,0,No,2,3459,Attachment-K-Jurupa_GraniteHill_AddtlDocs.pdf,Attachment-A-Signature-Page_JV_GH.pdf,Attachment-E-GH-Existing Conditions Photos.pdf,Attachment-D-GraniteHill_ProjectLayout.pdf,Attachment-B-Engr-Checklist-Granite Hill.pdf,,Attachment-I-Jurupa_GraniteHill_LettersOfSuppo...,,,Attachment-F-Project Estimate-Granite Hill.xlsx,,2022-06-14 23:50:13,"8-Jurupa Valley, City of-2",CYCLE 6,N,5487,0,0,,"15,800 linear feet of new sidewalk, 23 crosswa...",Yes,0,0,0,0,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,100,0,0,0,0,24,0,15800,0,New/Enhanced x-walks at AWSC ints,19,,0,0,0,4,0,0,0,0,0,0,,0,,0,0,0,0,Yes,No,No,,0,0,,0,0,0,0,0,0,0,3459,2022-06-14 23:50:13,N,0,N,0,N,0,N,0,,,N,,0,0,0,0,0,0,,0,0,0,0,0,0,0,0,0,0,,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,60,41,31
173,Tehachapi,09-5184,Yes,"Tehachapi, City of",00415S,115 S. Robinson Street,Development Services Director,93561,,,No,,34,,,23,,,Kern,9,"Install sidewalk, curb, gutter, curb ramps, im...",On the north side of West Valley Boulevard bet...,Valley Boulevard and Mill Street Gap Closure P...,Project is located within one of the ten large...,KCOG,Yes,3,35.12,-118.45,This 0.5 mile stretch of Valley Boulevard serv...,,,18,,,,Yes,,Yes,,No,,No,Infrastructure - Medium,Yes,20,0,Yes,80,No,0,,0,No,2,2462,,Attachment A 091420.pdf,"Attachment E, photos.pdf",Attachment D.pdf,Attachment B.pdf,,Attachment I.pdf,,Attachment F -Eng Est.pdf,Attachment F -Eng Est.pdf,,2020-10-01 10:25:53,"9-Tehachapi, City of-2",CYCLE 5,N,5184,No,No,,"Construction of 3,520 LF of sidewalk, 15 new A...",Yes,0,0,1130,0,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,94,0,3,950,0,15,0,2744,0,,0,,0,4,776,0,0,0,0,0,0,0,,0,0.0,0,0,4,0,No,No,Yes,,0,0,,0,0,0,0,0,0,0,2462,2020-10-01 10:25:53,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,34,23,18
832,South Gate,07-5257F15,Yes,"South Gate, City of",00336S,8650 California Avenue,Senior Civil Engineer,90280,,,No,,63,,,44,,,Los Angeles,7,Install capital safety improvements on Tweedy ...,"The 3-mile Tweedy Boulevard corridor, with imp...","Tweedy Boulevard Complete Streets, Phase II",Project is located within one of the ten large...,SCAG,Yes,4,,,\rThe Tweedy Boulevard Complete Streets Phase ...,,,33,,,,No,,Yes,,No,,Yes,Infrastructure - Medium,Yes,10,10,Yes,90,Yes,0,,0,No,1,3839,"Additional DAC Maps_Health, Mobility, Socioeco...",Final Part C- Attachment A- Signature Page for...,PHOTOS OF EXISTING CONDITIONS_Tweedy_0615.pdf,Tweedy_Recommendations_0615.pdf,EngineersChecklist_Tweedy.pdf,,All_Tweedy.pdf,,,20220614 Tweedy ATCP Cycle - JL.xlsx,,2022-06-15 21:19:05,"7-South Gate, City of-1",CYCLE 6,N,5257,0,0,Tweedy Mile Specific Plan,"13 curb extensions, 53 curb ramps, 26 continen...",No,0,0,0,32260,0,,0,0,0,0,0,13,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,13,0,0,0,0,0,0,0,0,0,Bus Shelter/Leaning Rail,16,Curb Extensions/Bulbouts,13,53,0,0,26,0,13,6,13,0,,0,,0,0,3,0,Yes,No,No,Leading Pedestrian Intervals,0,6,,0,0,0,0,2,0,0,3839,2022-06-15 21:19:05,N,0,N,0,N,0,N,0,,,N,,0,0,0,0,0,0,,0,0,0,0,0,0,0,0,0,0,,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,63,44,33
632,Concord,04-5135,Yes,"Concord, City of",00403S,"1950 Parkside Drive, 1B",Deputy Director of Public Works - Transportation,94519,LTR Funding Recommendation-Pine Hollow Road fo...,City of Clayton,Yes,City Manager,14,,,11,,,Contra Costa,4,"Install 1.5 miles of cycle track, 3 RRFBs, 8 s...",The project is a 2.0-mile corridor located on ...,Pine Hollow Road Complete Streets Project,Project is located within one of the ten large...,MTC,No,0,,,The Pine Hollow Complete Streets Project (“pro...,,,7,,,,Yes,,Yes,,Yes,,No,Infrastructure - Medium,Yes,20,5,Yes,80,Yes,0,,0,No,2,3705,,Signature Page ATP Grant with Concord Signed 0...,Pine Hollow ATP site photos.pdf,2022.06.15 Pine Hollow Road ATP GRANT plans an...,eng checklist.pdf,,Pine Hollow ATP support letters.pdf,,,Attachment-F-Project-Estimate_Pine Hollow.pdf,,2022-06-15 17:51:24,"4-Concord, City of-2",CYCLE 6,N,5135,No,Yes,"Bicycle, Pedestrian, & Safe Routes to Transit ...","Install 1.5 miles of cycle track, 3 RRFBs, 8 s...",Yes,6,0,0,2640,7920,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,20,0,31,0,0,76,0,1700,0,Raised intersections,2,,0,0,1150,0,5,0,0,1,0,0,,0,,3,0,0,10,Yes,No,No,Bulbounds and tighten radius,8,7,,0,0,3170,0,0,0,0,3705,2022-06-15 16:54:28,N,0,N,0,N,0,N,0,,,N,,0,0,0,0,0,0,,0,0,0,0,0,0,0,0,0,0,,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,14,11,7


## Change 0 values to Null

In [56]:
df_zero = df.loc[:, df.eq(0).any()]

# df[, 12:18][df[, 12:18] == 0] <- NA



In [57]:
df_zero_list = df_zero.columns.to_list()

In [58]:
## note: we might want to take out the assembly, congress and senate districts from this list

In [59]:
df_zero_list

['a2_assem_dist_a',
 'a2_assem_dist_b',
 'a2_assem_dist_c',
 'a2_congress_dist_a',
 'a2_congress_dist_b',
 'a2_congress_dist_c',
 'a2_past_proj_qty',
 'a2_senate_dist_a',
 'a2_senate_dist_b',
 'a2_senatedistc',
 'a3_st_bicycle_pct',
 'a3_st_num_schools',
 'a3_st_ped_pct',
 'a3_trail_elig_cost',
 'a3_trail_trans_pct',
 'agency_app_num',
 'b_sig_inter_new_bike_boxes',
 'b_class_1',
 'b_class_2',
 'b_class_3',
 'b_class_4',
 'b_light_intersection',
 'b_mid_block_new_rrfb_signal',
 'b_mid_block_surf_improv',
 'b_bsp_new_bikes',
 'b_bike_new_secured_lockers',
 'b_bike_new_racks',
 'b_bsp_new_station',
 'b_other_bike_improv_qty_1',
 'b_other_bike_improv_qty_2',
 'b_light_rdwy_seg',
 'b_sig_inter_timing_improv',
 'b_un_sig_new_rrfb_signal',
 'b_un_sig_cross_surf_improv',
 'm_cls_1_trails_widen_recon_exist',
 'm_cls_1_trails_new__less_than_9',
 'm_cls_1_trails_new_over_9',
 'm_non_cls_trails_new',
 'm_other_trail_improv_qty_1',
 'm_other_trail_improv_qty_2',
 'm_non_cls_widen_recon_exist',
 'p

In [60]:
#df[df_zero_list] = df[df_zero_list].replace({'0':np.nan, 0:np.nan})

In [64]:
def convert_zeros_to_nan(df):
    df_zero = df.loc[:, df.eq(0).any()]
    df_zero.drop(['a2_assem_dist_b','a2_assem_dist_c', 'a2_congress_dist_b', 'a2_congress_dist_c', 'a2_senate_dist_b', 'a2_senatedistc',
              'a2_past_proj_qty', 'a3_st_num_schools', 'agency_app_num',
             'a3_st_ped_pct', 'a3_trail_trans_pct', 'a4_ped_gap_pct',  'a4_reg_init_pct', 'a4_com_init_pct',
              'a4_safe_route_pct', 'a4_fl_mile_pct', 'a4_emp_based_pct', 'a4_other_ni_pct'
             ], axis=1, inplace=True)
    df_zero_list = df_zero.columns.to_list()
    df[df_zero_list] = df[df_zero_list].replace({'0':np.nan, 0:np.nan})
    
    return df

In [65]:
df = convert_zeros_to_nan(df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [66]:
df.head()

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_exhibit22_plan,attch_letters_of_support,attch_link,attch_ni_workplan,attch_project_estimate,completed_pdf_form,main_datetime_stamp,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,matches,assembly_district,congressional_district,senate_district
0,Merced,10-5939R,Yes,Merced County,00033S,345 west 7th street,Deputy Director,95340,,,No,,21,,,16,,,Merced,10,"PA&ED, PS&E, and CON funding for construction ...",1) South side of Haskell Ave from Cody ave to ...,Planada Sidewalk Infill Project,Project is located outside one of the ten larg...,MCAG,No,0,37.29,120.31,The Planada Sidewalk Infill Project is located...,,,12,,,,No,,Yes,,Yes,,No,Infrastructure - Small,Yes,20.0,1,Yes,80,Yes,,,0,No,1,1802,Planada Sidewalk infill ATP cross section 1.pdf,Attachment A- Signature Page.pdf,Existing Photos Attachment.pdf,Planada ATP Plan Concept.pdf,Attachment-B-Engr-Checklist (MH).pdf,,Letters of Support.pdf,,,Project Estimate.pdf,,2020-06-09 10:33:08,10-Merced County-1,CYCLE 5,N,5939,No,No,,Sidewalk infill along portions of Haskell aven...,No,,,,1500.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,6.0,,1500.0,,,,,,5.0,,4.0,3.0,,,,,,,,,,,,,No,No,Yes,,,,,,,,,,,,1802,2020-06-09 10:33:08,N,0,N,0,N,0,N,0,,0.0,N,,0,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,21,16,12
1,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Bishop Street Class 3 Bicycle Boulevard with T...,Bishop Street from Flower Street to Standard A...,Bishop Street Bicycle Boulevard Project,Project is located within one of the ten large...,SCAG,Yes,2,33.74,117.86,This project will implement a Class 3 bicycle ...,,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Medium,Yes,50.0,0,Yes,50,No,,,0,No,4,1811,Attachment K - Not Applicable.pdf,Attachment A - Signature Page.pdf,Attachment E - Photos of Existing Conditions.pdf,Attachment D - Project .Plans.pdf,Attachment B - Checklist.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost .Estimate.pdf,,2020-08-20 18:49:12,"12-Santa Ana, City of-4",CYCLE 5,N,5063,No,No,,"Install 1.15 mile bike boulevard, construction...",Yes,,,,6336.0,,,,,,,,,,,,,,,2.0,,,,,,,,,,,,,,100,,,,,,,,,,,,,38.0,,15.0,16.0,,18.0,3.0,,,,1.0,6.0,,18.0,,,Yes,No,No,,,,,,,8800.0,,,,,1811,2020-08-20 18:49:12,N,0,N,0,N,0,N,0,,0.0,N,,0,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,69,46,34
2,City of Pacifica,04-5350-F15,Yes,"Pacifica, City of",,151 Milagra Drive,Associate Civil Engineer,94044,,,No,,22,,,14,,,San Mateo,4,CON funding for installing bicycling facilitie...,On Palmetto Ave between Paloma Ave and West Av...,Palmetto Ave - Esplanade Ave Bicycle & Pedestr...,Project is located outside one of the ten larg...,MTC,No,0,37.65,-122.49,The project will install a combination of Clas...,,,13,,,,No,,Yes,,Yes,,No,Infrastructure - Small,Yes,50.0,2,Yes,50,No,,,0,No,1,1804,,Attachment-A-Signature-page.pdf,Photos.pdf,Attachment D_Palmetto & Esplanade Ped-Bike Imp...,Attachment B_Engineers Checklist.pdf,,Letters of Support.pdf,,,Attachment F_ ATP Cycle 5_Palmetto-Esplanade B...,,2020-06-15 11:05:03,"4-Pacifica, City of-1",CYCLE 5,N,5350,0,0,,Bicycling and pedestrian amenities will be ins...,Yes,,,13752.0,5748.0,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,40,2.0,,,,20.0,,,,,,,,9.0,,,,,,,,,,,,,,,,Yes,No,No,,,,,,,,,,,,1804,2020-06-15 11:05:03,N,0,N,0,N,0,N,0,,0.0,N,,0,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,22,14,13
3,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Pedestrian traffic safety improvements for Jef...,"In the City of Santa Ana, the safe routes to s...",Jefferson ES_Thorpe Fundamental_McFadden Int_G...,Project is located within one of the ten large...,SCAG,Yes,2,33.71,117.89,"This project will be repairing, replacing and ...",,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Large,No,,5,Yes,100,Yes,,,0,No,13,1822,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D -Plans.pdf,Attachment B - Check list.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,,2020-09-08 10:15:52,"12-Santa Ana, City of-13",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 8 intersections, ...",Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,50,,,,,60.0,,,,Left Turn Arrow,3.0,Enhanced Crosswalk Unsignalized,3.0,218.0,1000.0,7.0,,,1.0,,,,,,,,7.0,,,Yes,No,No,,,,,,,,,,,,1822,2020-09-08 10:15:52,N,0,N,0,N,0,N,0,,0.0,N,,0,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,69,46,34
4,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Pedestrian traffic safety improvements for La...,"In the City of Santa Ana, the safe routes to s...",Lathrop Intermediate_Lowell ES_Martin ES_Pio P...,Project is located within one of the ten large...,SCAG,Yes,4,33.73,117.87,"This project will be repairing, replacing and ...",,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Large,No,,5,Yes,100,Yes,,,0,No,14,1823,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D - Plan.pdf,Attachment B - Checklist.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,,2020-08-31 12:34:31,"12-Santa Ana, City of-14",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 6 intersections, ...",Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,50,,,,,43.0,,,,Enhance crosswalk (unsignalized),7.0,Raised Crosswalk,2.0,189.0,3455.0,5.0,,,1.0,,,,,,,2.0,5.0,,,Yes,No,No,,,,,,,,,,,,1823,2020-08-31 12:34:31,N,0,N,0,N,0,N,0,,0.0,N,,0,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,69,46,34


## Null columns

In [327]:
# finding columns with all null and dropping for now. will keep out of script

In [67]:
alldatanull = alldata.columns[alldata.isna().all()].tolist()

In [68]:
alldatanull

['#',
 'atp_id',
 'ppno',
 'ppno_1',
 'a2_project_location_map',
 'a3_plan_active_trans',
 'a3_plan_bicycle',
 'a3_plan_ped',
 'a3_plan_srts',
 'a4_bike_gap_pct',
 'a4_easement_support',
 'a4_emp_based']

In [69]:
alldata = alldata.drop(columns=alldatanull)

In [70]:
dfnull = df.columns[df.isna().all()].tolist()
df = df.drop(columns=dfnull)

In [71]:
dfnull

['a2_project_location_map',
 'a3_plan_active_trans',
 'a3_plan_bicycle',
 'a3_plan_ped',
 'a3_plan_srts',
 'a3_trail_elig_cost',
 'attch_exhibit22_plan',
 'attch_link',
 'completed_pdf_form',
 'a4_bike_gap_pct',
 'a4_easement_support',
 'a4_emp_based',
 'a4_le_methods']

In [72]:
cleanednull = cleaned.columns[cleaned.isna().all()].tolist()
cleaned = cleaned.drop(columns=cleanednull)

## Changing Column Types

In [73]:
df.a2_mpo.value_counts()

SCAG        315
MTC         132
SANDAG       62
Caltrans     52
SACOG        52
SJCOG        38
TCAG         37
AMBAG        34
KCOG         31
COFCG        31
SBCAG        19
SLOCOG       15
BCAG         14
TMPO         13
SRTA         11
StanCOG      11
MCTC          7
KCAG          3
MCAG          3
CVAG          2
Name: a2_mpo, dtype: int64

In [74]:
df.details_datetime_stamp.info()

<class 'pandas.core.series.Series'>
Int64Index: 882 entries, 0 to 881
Series name: details_datetime_stamp
Non-Null Count  Dtype         
--------------  -----         
882 non-null    datetime64[ns]
dtypes: datetime64[ns](1)
memory usage: 13.8 KB


In [75]:
compare_col = np.where(
    df["main_datetime_stamp"] == df["details_datetime_stamp"], True, False
)
df["compare_datetime"] = compare_col
df.compare_datetime.value_counts()

True     763
False    119
Name: compare_datetime, dtype: int64

In [76]:
# some datetimes are seconds different others are larger
(
    df
    >> filter(_.compare_datetime == False)
    >> select(_.details_datetime_stamp, _.main_datetime_stamp)
)

Unnamed: 0,details_datetime_stamp,main_datetime_stamp
27,2020-09-10 16:23:42,2020-09-10 16:23:41
37,2020-09-11 12:48:11,2020-09-11 12:48:10
55,2020-09-10 14:14:03,2020-09-10 14:14:02
67,2020-09-14 19:20:56,2020-09-14 19:20:55
75,2020-09-11 16:29:11,2020-09-11 16:29:10
...,...,...
869,2022-06-16 12:10:18,2022-06-21 11:28:23
871,2022-06-16 12:12:24,2022-06-16 12:12:23
872,2022-06-16 12:15:24,2022-06-16 12:15:23
874,2022-06-16 10:57:37,2022-06-16 10:57:36


In [77]:
df = df.drop(columns="compare_datetime")

In [78]:
df.sample(2)

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans_exists,a3_plan_bicycle_exists,a3_plan_ped_exists,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_letters_of_support,attch_ni_workplan,attch_project_estimate,main_datetime_stamp,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,matches,assembly_district,congressional_district,senate_district
268,Covina,,Yes,"Covina, City of",00202S,125 E. College St,City Engineer,91723,,,No,,48,,,32,,,Los Angeles,7,"At Covina HS:Install in-road light systems, RR...",Improvements around Covina High School at Puen...,Covina High School Pedestrian Improvements,Project is located within one of the ten large...,SCAG,Yes,1,34.08,-117.9,The proposed project will implement pedestrian...,,22,,,No,Yes,No,No,Infrastructure - Small,No,,3,Yes,100,Yes,,0,No,1,2315,Attachment K - Additional Attachments.pdf,Attachment A - Signature Page.pdf,Attachment E - Photos of Existing Conditions.pdf,Attachment D - Project Plans.pdf,Attachment B - Engineer's Checklist.pdf,Attachment I - Letters of Support.pdf,Attachment G - Not Applicable.pdf,Attachment F - Project Estimate.pdf,2020-09-15 13:07:46,"7-Covina, City of-1",CYCLE 5,N,5118,0,0,,Refresh 12 existing yellow high visibility cro...,No,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,Pedestrian Warning Signs,12.0,In-Road Light Systems,3.0,,,8.0,,16.0,,2.0,,,,,,3.0,,3.0,,Yes,No,No,Protected Left-Turn Phasing,,2.0,Leading Pedestrian Interval,2.0,,,,,,,2315,2020-09-15 13:07:46,N,0,N,0,N,0,N,0,0.0,N,,0,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,48,32,22
868,Fresno,06-5942R,Yes,Fresno County,00324S,"2220 Tulare Street, 6th Floor",Design Engineer,93720,,,No,,31,,,21,,,Fresno,6,Del Rey Sidewalk Project - Pedestrian improvem...,"Various locations in Del Rey, CA.",Del Rey Sidewalk Project,Project is located outside one of the large MP...,COFCG,No,0,,,Project Description:\rLocated within the disad...,,14,,,Yes,Yes,No,No,Infrastructure - Small,No,,1,Yes,100,Yes,,0,No,1,3870,,Attachment-A-Signature-Page - signed.pdf,ATTACHMENT E - PHOTOS-DEL REY.pdf,ATTACHMENT D - PRELIMINARY PLANS.pdf,Del Rey Attachment-B-Engr-Checklist stamp.pdf,LETTERS OF SUPPORT.pdf,,ATTACHMENT F - PROJECT ESTIMATE-DEL REY.pdf,2022-06-16 12:07:18,6-Fresno County-1,CYCLE 6,N,5942,0,0,Regional Transportation Plan,"Construct 14,200 linear feet concrete sidewalk...",Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,35.0,,11200.0,,,,,,9.0,3000.0,,,,,,,,,,,,,4.0,,No,No,Yes,,,,,,,,,,,,3870,2022-06-16 12:07:18,N,0,N,0,N,0,N,0,,N,,0,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,31,21,14


In [79]:
df.columns.tolist()

['a1_imp_agcy_city',
 'a1_imp_agcy_fed_ma_num',
 'a1_imp_agcy_ma',
 'a1_imp_agcy_name',
 'a1_imp_agcy_state_ma_num',
 'a1_imp_agcy_street',
 'a1_imp_agcy_title',
 'a1_imp_agcy_zip',
 'a1_letter_of_intent',
 'a1_proj_partner_agcy',
 'a1_proj_partner_exists',
 'a1_proj_partner_title',
 'a2_assem_dist_a',
 'a2_assem_dist_b',
 'a2_assem_dist_c',
 'a2_congress_dist_a',
 'a2_congress_dist_b',
 'a2_congress_dist_c',
 'a2_county',
 'a2_ct_dist',
 'a2_info_proj_descr',
 'a2_info_proj_loc',
 'a2_info_proj_name',
 'a2_mop_uza_population',
 'a2_mpo',
 'a2_past_proj',
 'a2_past_proj_qty',
 'a2_proj_lat',
 'a2_proj_long',
 'a2_proj_scope_summary',
 'a2_rtpa',
 'a2_senate_dist_a',
 'a2_senate_dist_b',
 'a2_senatedistc',
 'a3_plan_active_trans_exists',
 'a3_plan_bicycle_exists',
 'a3_plan_ped_exists',
 'a3_plan_srts_exists',
 'a3_proj_type',
 'a3_st_bicycle_applies',
 'a3_st_bicycle_pct',
 'a3_st_num_schools',
 'a3_st_ped_applies',
 'a3_st_ped_pct',
 'a3_st_srts',
 'a3_trail_fed_funding',
 'a3_trail_t

### Add Geometry

In [80]:
from dla_utils import _dla_utils
from shared_utils import geography_utils

In [81]:
gdf = geography_utils.create_point_geometry(
    df, longitude_col="a2_proj_long", latitude_col="a2_proj_lat"
)

In [82]:
gdf.sample(1)

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans_exists,a3_plan_bicycle_exists,a3_plan_ped_exists,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_letters_of_support,attch_ni_workplan,attch_project_estimate,main_datetime_stamp,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,matches,assembly_district,congressional_district,senate_district,geometry
778,Reedley,06-5216F15,Yes,"Reedley, City of",-,1733 Ninth Street,City Engineer,93654,,,No,,31,,,21,,,Fresno,6,"Construct Class 1 Bikeway, concrete sidewalk, ...",Dinuba Avenue; intersection of Dinuba and Zumw...,Reedley Parkway Phase VI,Project is located outside one of the large MP...,COFCG,Yes,1,,,The project will transform an incomplete and u...,,14,,,No,Yes,Yes,No,Infrastructure - Small,No,,0,Yes,100,No,No,0,Yes,1,3549,,Part C Attachment A Signature Page.pdf,Project Area Photos with notes - ATP Cycle 6 -...,Part C Attachment D Project Layout.pdf,Part C Attachment B Eng Checklist.pdf,Part C Attachment I Letters of Support.pdf,,Part C Attachment F Project Estimate.pdf,2022-06-15 12:04:43,"6-Reedley, City of-1",CYCLE 6,N,5216,0,0,Fresno County Regional Active Transportation Plan,"Construction of 2,840 linear feet of Class 1 B...",Yes,,,,,,,,,,,,,,,,,,,,,,2840.0,,,Trail Lighting,19.0,,,,,0,,1.0,,,2.0,,1680.0,,,,,,,,,3.0,,,,,470.0,,,,,,,,No,No,Yes,,,,,,,,,,,,3549,2022-06-15 12:04:43,N,0,N,0,N,0,N,0,,N,,0,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,31,21,14,POINT EMPTY


### Change columns to integers

In [83]:
def get_num(x):
    try:
        return int(x)
    except Exception:
        try:
            return float(x)
        except Exception:
            return x

In [84]:
columns_to_int = [
    "a1_locode",
    # "a2_senatedistc",
    # "a2_senate_dist_b",
    # "a2_assem_dist_b",
    # "a2_assem_dist_c",
    # "a2_congress_dist_b",
    # "a2_congress_dist_c",
    # "a2_proj_lat",
    # "a2_proj_long",
    # "a2_senate_dist_b",
    # "a2_senatedistc",
    "p_un_sig_inter_new_roundabout",
 #   "a4_emp_based_pct",
#    "a4_le_methods",
    "a4_srts_le",
    "a1_locode",
    "a2_senatedistc",
    "a2_senate_dist_b",
]

In [85]:
# gdf[columns_to_int] = gdf[columns_to_int].apply(get_num)

In [86]:
for col in columns_to_int:
    gdf[col] = gdf[col].apply(get_num)

In [87]:
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 882 entries, 0 to 881
Columns: 202 entries, a1_imp_agcy_city to geometry
dtypes: Int64(7), category(1), datetime64[ns](2), float64(83), geometry(1), int64(15), object(93)
memory usage: 1.4+ MB


In [88]:
gdf.select_dtypes("int64")

Unnamed: 0,a1_imp_agcy_zip,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_ct_dist,a2_past_proj_qty,a2_senate_dist_a,a3_st_num_schools,a3_st_ped_pct,a3_trail_trans_pct,agency_app_num,app_pk,a4_ped_gap_pct,app_fk,a4_reg_init_pct,a4_com_init_pct,a4_safe_route_pct,a4_fl_mile_pct,a4_other_ni_pct
0,95340,21,,,16,,,10,0,12,1,80,0,1,1802,0,1802,0,0,0,0,0
1,92702,69,,,46,,,12,2,34,0,50,0,4,1811,100,1811,0,0,0,0,0
2,94044,22,,,14,,,4,0,13,2,50,0,1,1804,40,1804,0,0,0,0,0
3,92702,69,,,46,,,12,2,34,5,100,0,13,1822,50,1822,0,0,0,0,0
4,92702,69,,,46,,,12,4,34,5,100,0,14,1823,50,1823,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
877,91362,,4,4,,2,6,7,0,,0,5,0,1,3192,5,3192,0,0,0,0,0
878,91733,49,,,32,,,7,5,22,4,25,0,1,3859,0,3859,0,0,0,0,0
879,95113,25,,,19,,,4,1,15,0,40,0,3,3860,2,3860,0,0,0,0,0
880,93101,,3,7,,2,4,5,2,,1,75,0,2,3845,20,3845,50,0,50,0,0


In [89]:
gdf.select_dtypes("object")

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_county,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_proj_scope_summary,a2_rtpa,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans_exists,a3_plan_bicycle_exists,a3_plan_ped_exists,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_ped_applies,a3_st_srts,a3_trail_fed_funding,a3_trails,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_letters_of_support,attch_ni_workplan,attch_project_estimate,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_other_bike_improv_1,b_other_bike_improv_2,m_other_trail_imprv_1,m_other_trail_imprv_2,p_other_ped_imprv_1,p_other_ped_imprv_2,p_amenities_shade_tree_type,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_other_traffic_calming_imprv_2,a4_reg_init,a4_com_init,a4_safe_route,a4_fl_mile,a4_other_ni,a4_other_ni_descr,a4_act_other_1_descr,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,assembly_district,congressional_district,senate_district
0,Merced,10-5939R,Yes,Merced County,00033S,345 west 7th street,Deputy Director,,,No,,Merced,"PA&ED, PS&E, and CON funding for construction ...",1) South side of Haskell Ave from Cody ave to ...,Planada Sidewalk Infill Project,Project is located outside one of the ten larg...,MCAG,No,The Planada Sidewalk Infill Project is located...,,,,No,Yes,Yes,No,Infrastructure - Small,Yes,Yes,Yes,,No,Planada Sidewalk infill ATP cross section 1.pdf,Attachment A- Signature Page.pdf,Existing Photos Attachment.pdf,Planada ATP Plan Concept.pdf,Attachment-B-Engr-Checklist (MH).pdf,Letters of Support.pdf,,Project Estimate.pdf,10-Merced County-1,CYCLE 5,N,5939,No,No,,Sidewalk infill along portions of Haskell aven...,No,,,,,,,,No,No,Yes,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,21,16,12
1,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,,,No,,Orange,Bishop Street Class 3 Bicycle Boulevard with T...,Bishop Street from Flower Street to Standard A...,Bishop Street Bicycle Boulevard Project,Project is located within one of the ten large...,SCAG,Yes,This project will implement a Class 3 bicycle ...,,,,Yes,Yes,No,Yes,Infrastructure - Medium,Yes,Yes,No,,No,Attachment K - Not Applicable.pdf,Attachment A - Signature Page.pdf,Attachment E - Photos of Existing Conditions.pdf,Attachment D - Project .Plans.pdf,Attachment B - Checklist.pdf,Attachment I - Letter of Support.pdf,Attachment G - Not Applicable.pdf,Attachment F - Cost .Estimate.pdf,"12-Santa Ana, City of-4",CYCLE 5,N,5063,No,No,,"Install 1.15 mile bike boulevard, construction...",Yes,,,,,,,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,69,46,34
2,City of Pacifica,04-5350-F15,Yes,"Pacifica, City of",,151 Milagra Drive,Associate Civil Engineer,,,No,,San Mateo,CON funding for installing bicycling facilitie...,On Palmetto Ave between Paloma Ave and West Av...,Palmetto Ave - Esplanade Ave Bicycle & Pedestr...,Project is located outside one of the ten larg...,MTC,No,The project will install a combination of Clas...,,,,No,Yes,Yes,No,Infrastructure - Small,Yes,Yes,No,,No,,Attachment-A-Signature-page.pdf,Photos.pdf,Attachment D_Palmetto & Esplanade Ped-Bike Imp...,Attachment B_Engineers Checklist.pdf,Letters of Support.pdf,,Attachment F_ ATP Cycle 5_Palmetto-Esplanade B...,"4-Pacifica, City of-1",CYCLE 5,N,5350,0,0,,Bicycling and pedestrian amenities will be ins...,Yes,,,,,,,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,22,14,13
3,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,,,No,,Orange,Pedestrian traffic safety improvements for Jef...,"In the City of Santa Ana, the safe routes to s...",Jefferson ES_Thorpe Fundamental_McFadden Int_G...,Project is located within one of the ten large...,SCAG,Yes,"This project will be repairing, replacing and ...",,,,Yes,Yes,No,Yes,Infrastructure - Large,No,Yes,Yes,,No,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D -Plans.pdf,Attachment B - Check list.pdf,Attachment I - Letter of Support.pdf,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,"12-Santa Ana, City of-13",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 8 intersections, ...",Yes,,,,,Left Turn Arrow,Enhanced Crosswalk Unsignalized,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,69,46,34
4,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,,,No,,Orange,Pedestrian traffic safety improvements for La...,"In the City of Santa Ana, the safe routes to s...",Lathrop Intermediate_Lowell ES_Martin ES_Pio P...,Project is located within one of the ten large...,SCAG,Yes,"This project will be repairing, replacing and ...",,,,Yes,Yes,No,Yes,Infrastructure - Large,No,Yes,Yes,,No,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D - Plan.pdf,Attachment B - Checklist.pdf,Attachment I - Letter of Support.pdf,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,"12-Santa Ana, City of-14",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 6 intersections, ...",Yes,,,,,Enhance crosswalk (unsignalized),Raised Crosswalk,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,69,46,34
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
877,Thousand Oaks,07-5292F15,Yes,"Thousand Oaks, City of",00252,2100 Thousand Oaks Blvd.,Transportation Planner,,,No,,Ventura,"Construction funding for Class IV bikelanes, ...","In the City of Thousand Oaks, Lynn Road betwee...",Lynn Road Bike Lanes and Pedestrain Improvements,Project is located within one of the ten large...,SCAG,No,The project is located on 4.5-miles of Lynn R...,,2,7,Yes,No,No,No,Infrastructure - Small,Yes,Yes,No,,No,ATTACHMENT K.pdf,Attachement A Signed.pdf,photoskn.pdf,Lynn Concept Plans.pdf,Attachment-B-Engr-Checklist_Lynn_SB.pdf,Letters of Support.pdf,,Attachment-F-Project-Estimate_Lynn_SB.xlsx,"7-Thousand Oaks, City of-1",CYCLE 6,N,5392,No,Yes,Local Road Safety Plan,"265' new sidewalk, 2 rapid flashing beacons, 1...",Yes,,,,,,,,No,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,44,26,27
878,South El Monte,,Yes,"South El Monte, City of",07-5352S21,1415 Santa Anita Avenue,Community Development Director,Partner Agency Letter of Intent.pdf,City of El Monte,Yes,City Engineer,Los Angeles,Construct Class II bike lane segments; install...,Merced Avenue from Garvey Avenue to Fern Stree...,Merced Avenue Greenway,Project is located within one of the ten large...,SCAG,Yes,The project will implement bicyclist/pedestria...,,,,No,Yes,No,No,Infrastructure - Small,Yes,Yes,No,,No,Att K - Support Docs.pdf,Att A - Signature Page.pdf,Att E - Photos of Existing Conditions.pdf,Att D - Project Plans.pdf,Att B - Eng Checklist.pdf,Att I - Letters of Support.pdf,Att G - Not Applicable.pdf,Att F - Project Estimate.xlsx,"7-South El Monte, City of-1",CYCLE 6,N,5352,0,0,,Construct 0.97-mile Class II bike path; 4 enha...,Yes,,,,,,,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,49,32,22
879,San Jose,04-5005F15,Yes,"San Jose, City of",00200S,200 E Santa Clara St,Senior Transportation Specialist,,,No,,Santa Clara,This project will decouple 2nd and 3rd street ...,The project is in SoFA arts district in southw...,2nd & 3rd Street De-Coupling and Complete Stre...,Project is located within one of the ten large...,MTC,Yes,"The City of San José, through its Downtown Tra...",,,,No,Yes,No,No,Infrastructure - Large,Yes,Yes,No,,No,attachment k.pdf,Attachment-A-Signature-Page (1)_jr (1).pdf,Attachment_G_Site_Photos.pdf,2_3DESIGNS.pdf,Attachment-B-Engr-Checklist- 2nd and 3rd.pdf,LOS.pdf,,2nd and 3rd ATP Engineers Estimate_Final.pdf,"4-San Jose, City of-3",CYCLE 6,N,5005,0,0,"Emerging mobility Action Plan, Carbon Neutral ...",Project constructs approximately 6840 feet of ...,Yes,Bike Ramps,Raised Intersections,,,Fully Bulbed (all 4 corners),,,Yes,No,No,Conversion of 1 to 2 way operation,<---- 0.68 miles,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,25,19,15
880,Santa Barbara,05-5951R,Yes,Santa Barbara County,00100S,123 E. Anapuma St,Alternative Transportation Manager,,,No,,Santa Barbara,"Curb extensions, sidewalks and crosswalks for ...",Unincorporated neighborhood located south of E...,Isla Vista Bike and Pedestrian Improvements Pr...,Project is located outside one of the large MP...,SBCAG,Yes,"Isla Vista is a place like no other. 15,733 pe...",,1,9,Yes,No,No,No,Infrastructure + NI - Medium,Yes,Yes,Yes,,No,,Attachment A_Signature Page - 2022.pdf,Existing Conditions Photos.pdf,Isla Vista Community Improvements - ATP Cycle ...,Attachment B-Engr Checklist IV.pdf,Attachment I - Letters of Support 2022.pdf,Attachment-G-Exhibit-25-R-NI-Work-Plan - Isla ...,Attachment-F-Project-Estimate-IV Updated.pdf,5-Santa Barbara County-2,CYCLE 6,N,5951,0,0,Regional Transportation Plan,"Curb extensions, sidewalks, and bicycle networ...",Yes,Bike left-hand turn lanes,Class 2 conflict / intersection striping,,,,,,No,No,Yes,,,Y,N,Y,N,N,,,,N,N,Y,Y,N,N,,"Spanish, Mandarin",Y,N,Y,N,N,N,,N,N,N,N,No,N,37,24,19


In [90]:
gdf.select_dtypes("float64")

Unnamed: 0,a2_proj_lat,a2_proj_long,a3_st_bicycle_pct,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_qty_1,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_improv_qty_1,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_qty_1,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,a4_emp_based_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_2
0,37.29,120.31,20.00,,,,1500.00,,,,,,,,,,,,,,,,,,,,,,,,,,,6.00,,1500.00,,,,5.00,,4.00,3.00,,,,,,,,,,,,,,,,,,,,,0.00,,,,,,,,,,,,,,,,,,,
1,33.74,117.86,50.00,,,,6336.00,,,,,,,,,,,,2.00,,,,,,,,,,,,,,,,,,,,,38.00,,15.00,16.00,,18.00,3.00,,,1.00,6.00,,18.00,,,,,,,8800.00,,,,,0.00,,,,,,,,,,,,,,,,,,,
2,37.65,-122.49,50.00,,,13752.00,5748.00,,,,,,,,,,,,,1.00,,,,,,,,,,2.00,,,,20.00,,,,,,9.00,,,,,,,,,,,,,,,,,,,,,,,,0.00,,,,,,,,,,,,,,,,,,,
3,33.71,117.89,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,60.00,,,,3.00,3.00,218.00,1000.00,7.00,,,1.00,,,,,,,7.00,,,,,,,,,,,,0.00,,,,,,,,,,,,,,,,,,,
4,33.73,117.87,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,43.00,,,,7.00,2.00,189.00,3455.00,5.00,,,1.00,,,,,,2.00,5.00,,,,,,,,,,,,0.00,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
877,,,95.00,,,,,24820.00,,,,,,,,,,324.00,2.00,2.00,,,,,,,,,,,,,,,,265.00,,,,4.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
878,,,75.00,,,5100.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.00,23000.00,4.00,,16.00,,,,,,,,4.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
879,,,60.00,9.00,,,,6840.00,,,,,,,,8.00,6.00,,,,4.00,,,,,,,,,,,,,,,,,60.00,,48.00,6840.00,8.00,,,2.00,,,,,,,4.00,4.00,6840.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
880,,,25.00,2.00,,1700.00,22410.00,,,,,,,,,10.00,24.00,,,,,,,,,,,,,1.00,,,1.00,110.00,,2820.00,,,,23.00,,,,,,,,,,,,42.00,17.00,2700.00,,,,,,,,,,,2.00,4.00,4.00,,,,,,4.00,2.00,,,,,,,,,


#### Columns to add/change:
* ~geometry column for lat long~
* ~agg senate_dist~
* ~agg congressional_dist~
* ~agg assemb_dist~
* a1_locode to int64
* ~change 0s values in column to null~


#### Columns to maybe add
* a2_county acronym
*

In [91]:
df >> select(_.a2_county)

Unnamed: 0,a2_county
0,Merced
1,Orange
2,San Mateo
3,Orange
4,Orange
...,...
877,Ventura
878,Los Angeles
879,Santa Clara
880,Santa Barbara


## Locode Check

In [92]:
gdf.a1_locode.info()

<class 'pandas.core.series.Series'>
Int64Index: 882 entries, 0 to 881
Series name: a1_locode
Non-Null Count  Dtype 
--------------  ----- 
881 non-null    object
dtypes: object(1)
memory usage: 13.8+ KB


In [95]:
(gdf>>select(_.a1_locode)>>filter(_.a1_locode.isna()))

Unnamed: 0,a1_locode
433,


In [None]:
gdf.loc[:, gdf.isna().any()]

In [99]:
(gdf[pd.to_numeric(gdf['a1_locode'], errors='coerce').isnull()])>>select(_.a1_imp_agcy_city, _.a1_imp_agcy_name, _.a1_locode)

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_name,a1_locode
28,Covelo,Round Valley Indians Tribe,
91,Orleans,Karuk Tribe,
159,Biggs,"Biggs, City of",
202,Hawaiian Gardens,"Hawaiian Gardens, City of",
265,San Francisco,Bay Area Toll Authority,
319,Redding,Department of Transportation,
387,Los Angeles,California Department of Transportation,
413,Fremont,"Fremont, City of",
433,Salinas,Monterey County,
636,Nice,Robinson Rancheria,


In [100]:
# for the agencies with no locode, we could coerce the errors to return a "None" value