# ATP Data Exploration

In [1]:
import altair as alt
import altair_saver
import intake
import ipywidgets as widgets
import numpy as np
import pandas as pd
from calitp import to_snakecase
from dla_utils import _dla_utils
from IPython.display import HTML, Image, Markdown
from ipywidgets import interact, interactive
from plotnine import *
from shared_utils import altair_utils, styleguide
from siuba import *



In [2]:
pd.set_option("display.max_columns", 220)

## Reading in w/o utils

In [3]:
main_details = to_snakecase(
    pd.read_excel("gs://calitp-analytics-data/data-analyses/dla/atp/Main Details.xls")
)
project_details = to_snakecase(
    pd.read_excel(
        "gs://calitp-analytics-data/data-analyses/dla/atp/Project Details.xls"
    )
)

In [4]:
# main_details.info()

In [5]:
# project_details.sample()

In [6]:
# project_details.project_cycle.value_counts()

In [7]:
# project_details>>count(_.project_app_id)>>filter(_.n>1)

In [8]:
# project_details>>group_by(_.project_cycle)>>count(_.project_app_id)>>filter(_.n>1)

* multiple project ids, but no duplicates by project cycle. 

In [9]:
## merging

In [10]:
# df = pd.merge(main_details, project_details, how="outer", on=["project_app_id", "project_cycle"], indicator='matches')

In [11]:
# (df>>filter(_.project_app_id =='1-Mendocino Council of Governments-1')>>select(_.project_app_id,
#                                                                               _.project_cycle,
#                                                                              _.matches,
#                                                                               _.agency_app_num))

In [12]:
# df.info()

### Comparing column names

code help: https://stackoverflow.com/questions/45482755/compare-headers-of-dataframes-in-pandas

In [13]:
# columns in both dfs
main_details.columns.intersection(project_details.columns)

Index(['project_app_id', 'project_cycle', 'awarded'], dtype='object')

In [14]:
# columns in main_details not in project_details
main_details.columns.difference(project_details.columns)

Index(['a1_imp_agcy_city', 'a1_imp_agcy_contact', 'a1_imp_agcy_email',
       'a1_imp_agcy_fed_ma_num', 'a1_imp_agcy_ma', 'a1_imp_agcy_name',
       'a1_imp_agcy_phone', 'a1_imp_agcy_state_ma_num', 'a1_imp_agcy_street',
       'a1_imp_agcy_title', 'a1_imp_agcy_zip', 'a1_letter_of_intent',
       'a1_locode', 'a1_proj_partner_agcy', 'a1_proj_partner_contact',
       'a1_proj_partner_email', 'a1_proj_partner_exists',
       'a1_proj_partner_phone', 'a1_proj_partner_title', 'a2_assem_dist_a',
       'a2_assem_dist_b', 'a2_assem_dist_c', 'a2_congress_dist_a',
       'a2_congress_dist_b', 'a2_congress_dist_c', 'a2_county', 'a2_ct_dist',
       'a2_info_proj_descr', 'a2_info_proj_loc', 'a2_info_proj_name',
       'a2_mop_uza_population', 'a2_mpo', 'a2_output_outcome', 'a2_past_proj',
       'a2_past_proj_qty', 'a2_proj_lat', 'a2_proj_long',
       'a2_proj_scope_summary', 'a2_project_location_map', 'a2_rtpa',
       'a2_senate_dist_a', 'a2_senate_dist_b', 'a2_senatedistc',
       'a3_current

In [15]:
# columns in project_details not in main_details
project_details.columns.difference(main_details.columns)

Index(['a4_act_other_1', 'a4_act_other_1_descr', 'a4_act_other_2',
       'a4_act_other_2_decr', 'a4_after_school', 'a4_bike_classes',
       'a4_bike_gap_pct', 'a4_bike_rodeos', 'a4_bike_train', 'a4_classrooms',
       ...
       'v_other_traffic_calming_imprv_2', 'v_other_traffic_calming_qty_1',
       'v_other_traffic_calming_qty_2', 'v_remove_right_turn_pocket',
       'v_remove_travel_ln', 'v_sig_inter_new_roundabout',
       'v_sig_inter_timing_improv', 'v_speed_feedback_signs',
       'v_un_sig_inter_new_roundabout', 'v_un_sig_inter_new_traf_sig'],
      dtype='object', length=132)

## Reading in w/ utils

In [16]:
import utils

In [17]:
df = utils.read_in_data()

In [18]:
df.head()

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_exhibit22_plan,attch_letters_of_support,attch_link,attch_ni_workplan,attch_project_estimate,completed_pdf_form,main_datetime_stamp,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,matches
0,Merced,10-5939R,Yes,Merced County,00033S,345 west 7th street,Deputy Director,95340,,,No,,21,,,16,,,Merced,10,"PA&ED, PS&E, and CON funding for construction ...",1) South side of Haskell Ave from Cody ave to ...,Planada Sidewalk Infill Project,Project is located outside one of the ten larg...,MCAG,No,0,37.29,120.31,The Planada Sidewalk Infill Project is located...,,,12,,,,No,,Yes,,Yes,,No,Infrastructure - Small,Yes,20,1,Yes,80,Yes,0,,0,No,1,1802,Planada Sidewalk infill ATP cross section 1.pdf,Attachment A- Signature Page.pdf,Existing Photos Attachment.pdf,Planada ATP Plan Concept.pdf,Attachment-B-Engr-Checklist (MH).pdf,,Letters of Support.pdf,,,Project Estimate.pdf,,2020-06-09 10:33:08,10-Merced County-1,CYCLE 5,N,5939,No,No,,Sidewalk infill along portions of Haskell aven...,No,0,0,0,1500,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,0,0,0,0,0,6,0,1500,0,,0,,0,5,0,4,3,0,0,0,0,0,,0,0.0,0,0,0,0,No,No,Yes,,0,0,,0,0,0,0,0,0,0,1802,2020-06-09 10:33:08,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both
1,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Bishop Street Class 3 Bicycle Boulevard with T...,Bishop Street from Flower Street to Standard A...,Bishop Street Bicycle Boulevard Project,Project is located within one of the ten large...,SCAG,Yes,2,33.74,117.86,This project will implement a Class 3 bicycle ...,,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Medium,Yes,50,0,Yes,50,No,0,,0,No,4,1811,Attachment K - Not Applicable.pdf,Attachment A - Signature Page.pdf,Attachment E - Photos of Existing Conditions.pdf,Attachment D - Project .Plans.pdf,Attachment B - Checklist.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost .Estimate.pdf,,2020-08-20 18:49:12,"12-Santa Ana, City of-4",CYCLE 5,N,5063,No,No,,"Install 1.15 mile bike boulevard, construction...",Yes,0,0,0,6336,0,,0,0,0,0,0,0,0,,0,,0,0,2,0,0,,0,0,0,0,,0,,0,0,0,100,0,0,0,0,0,0,0,0,,0,,0,38,0,15,16,0,18,3,0,0,,1,6.0,0,18,0,0,Yes,No,No,,0,0,,0,0,8800,0,0,0,0,1811,2020-08-20 18:49:12,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both
2,City of Pacifica,04-5350-F15,Yes,"Pacifica, City of",,151 Milagra Drive,Associate Civil Engineer,94044,,,No,,22,,,14,,,San Mateo,4,CON funding for installing bicycling facilitie...,On Palmetto Ave between Paloma Ave and West Av...,Palmetto Ave - Esplanade Ave Bicycle & Pedestr...,Project is located outside one of the ten larg...,MTC,No,0,37.65,-122.49,The project will install a combination of Clas...,,,13,,,,No,,Yes,,Yes,,No,Infrastructure - Small,Yes,50,2,Yes,50,No,0,,0,No,1,1804,,Attachment-A-Signature-page.pdf,Photos.pdf,Attachment D_Palmetto & Esplanade Ped-Bike Imp...,Attachment B_Engineers Checklist.pdf,,Letters of Support.pdf,,,Attachment F_ ATP Cycle 5_Palmetto-Esplanade B...,,2020-06-15 11:05:03,"4-Pacifica, City of-1",CYCLE 5,N,5350,0,0,,Bicycling and pedestrian amenities will be ins...,Yes,0,0,13752,5748,0,,0,0,0,0,0,0,0,,0,,0,0,0,1,0,,0,0,0,0,,0,,0,0,0,40,2,0,0,0,20,0,0,0,,0,,0,9,0,0,0,0,0,0,0,0,,0,0.0,0,0,0,0,Yes,No,No,,0,0,,0,0,0,0,0,0,0,1804,2020-06-15 11:05:03,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both
3,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Pedestrian traffic safety improvements for Jef...,"In the City of Santa Ana, the safe routes to s...",Jefferson ES_Thorpe Fundamental_McFadden Int_G...,Project is located within one of the ten large...,SCAG,Yes,2,33.71,117.89,"This project will be repairing, replacing and ...",,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Large,No,0,5,Yes,100,Yes,0,,0,No,13,1822,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D -Plans.pdf,Attachment B - Check list.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,,2020-09-08 10:15:52,"12-Santa Ana, City of-13",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 8 intersections, ...",Yes,0,0,0,0,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,50,0,0,0,0,60,0,0,0,Left Turn Arrow,3,Enhanced Crosswalk Unsignalized,3,218,1000,7,0,0,1,0,0,0,,0,0.0,0,7,0,0,Yes,No,No,,0,0,,0,0,0,0,0,0,0,1822,2020-09-08 10:15:52,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both
4,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Pedestrian traffic safety improvements for La...,"In the City of Santa Ana, the safe routes to s...",Lathrop Intermediate_Lowell ES_Martin ES_Pio P...,Project is located within one of the ten large...,SCAG,Yes,4,33.73,117.87,"This project will be repairing, replacing and ...",,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Large,No,0,5,Yes,100,Yes,0,,0,No,14,1823,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D - Plan.pdf,Attachment B - Checklist.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,,2020-08-31 12:34:31,"12-Santa Ana, City of-14",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 6 intersections, ...",Yes,0,0,0,0,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,50,0,0,0,0,43,0,0,0,Enhance crosswalk (unsignalized),7,Raised Crosswalk,2,189,3455,5,0,0,1,0,0,0,,0,0.0,2,5,0,0,Yes,No,No,,0,0,,0,0,0,0,0,0,0,1823,2020-08-31 12:34:31,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both


In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 882 entries, 0 to 881
Columns: 211 entries, a1_imp_agcy_city to matches
dtypes: category(1), datetime64[ns](2), float64(23), int64(97), object(88)
memory usage: 1.4+ MB


In [20]:
df.matches.value_counts()

both          882
left_only       0
right_only      0
Name: matches, dtype: int64

In [21]:
df.awarded_y.value_counts()

N    882
Name: awarded_y, dtype: int64

### Comparing merged df with cleaned data

In [148]:
#removing columns with agency staff information
columns_to_drop = [
    "a1_imp_agcy_contact",
    "a1_imp_agcy_email",
    "a1_imp_agcy_phone",
    "a1_proj_partner_contact",
    "a1_proj_partner_email",
    "a1_proj_partner_phone",
]

In [24]:
alldata = to_snakecase(
    pd.read_excel(
        "gs://calitp-analytics-data/data-analyses/dla/atp/Master_AllData_Cycle5FieldMapping.xls",
        sheet_name="AllData",
    )
)

In [25]:
alldata = alldata.drop(columns=columns_to_drop)

In [26]:
alldata.sample()

Unnamed: 0,awarded,project_cycle,a2_ct_dist,#,atp_id,ppno,ppno_1,a3_proj_type,project_app_id,a2_info_proj_name,a2_county,a1_locode,a1_imp_agcy_name,a1_imp_agcy_street,a1_imp_agcy_city,a1_imp_agcy_zip,a1_imp_agcy_title,a1_imp_agcy_ma,a1_imp_agcy_state_ma_num,a1_imp_agcy_fed_ma_num,a1_proj_partner_exists,a1_proj_partner_agcy,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a2_info_proj_descr,a2_info_proj_loc,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_current_plan,a3_trails,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo
168,N,CYCLE 5,10,,,,,Infrastructure - Medium,"10-Stockton, City of-1",Main Street and Market Street Complete Streets,San Joaquin,5008,"Stockton, City of","22 E. Weber Avenue, Room 301",Stockton,95202,Associate Civil Engineer,Yes,00149S,10-5008R,No,,,13,,,9,,,5,,,"Install various measures, including 35,550 LF ...",The project is located in Stockton on Main Str...,Project is located within one of the ten large...,SJCOG,No,0,37.96,-121.27,"Currently, Main Street and Market Street consi...",,,,Yes,,Yes,,No,,Yes,Yes,50,5,Yes,50,Yes,0,,0,Yes,No,0,0,,"Promote active transportation, improve connect...",7,0,35550,0,0,,0,0,0,0,0,0,0,,0,,0,0,7,0,0,,0,0,0,0,,0,,0,0,0,0,0,120,0,0,16,0,0,0,,0,,0,95,2063,7,0,0,0,0,0,0,,0,0,0,0,0,0,No,No,No,,0,0,,0,0,35550,2,0,0,0,N,0,N,0,N,0,N,0,,0,N,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,Y,N,N,No


In [27]:
cleaned = to_snakecase(
    pd.read_excel(
        "gs://calitp-analytics-data/data-analyses/dla/atp/Master_AllData_Cycle5FieldMapping.xls",
        sheet_name="AllDataFieldMapping Cleaned",
    )
)

In [28]:
cleaned.sample()

Unnamed: 0,awarded,project_cycle,a2_ct_dist,#,atp_id,ppno,ppno_1,a3_proj_type,project_app_id,a2_info_proj_name,a2_county,a1_locode,a1_imp_agcy_name,a1_imp_agcy_street,a1_imp_agcy_city,a1_imp_agcy_zip,a1_imp_agcy_contact,a1_imp_agcy_title,a1_imp_agcy_email,a1_imp_agcy_phone,a1_imp_agcy_ma,a1_imp_agcy_state_ma_num,a1_imp_agcy_fed_ma_num,a1_proj_partner_exists,a1_proj_partner_agcy,a1_proj_partner_contact,a1_proj_partner_title,a1_proj_partner_email,a1_proj_partner_phone,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,congressional_district,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,senate_district,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a2_info_proj_descr,a2_info_proj_loc,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_current_plan,a3_trails,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,...,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,original_prog__amt___pa_ed_,orig__prog__year__pa_ed_,original_prog__amt___ps_e_,orig__prog__year__ps_e_,original_prog__amt___rw_,orig__prog__year__rw_,orignal_prog__amt___con_,orig__prog__year__con_,original_prog__amt___con_ni_,orig__prog__year__con_ni_,unnamed:_215,prog__amount__paed__1,prog__amount__pse__1,prog__amount__rw__1,prog__amount__con__1,prog__amount__con_ni__1,unnamed:_221,fund_year_1,prog__amount__paed__2,prog__amount__pse__2,prog__amount__rw__2,prog__amount__con__2,prog__amount__con_ni__2,unnamed:_228,fund_year_2,prog__amount__paed__3,prog__amount__pse__3,prog__amount__rw__3,prog__amount__con__3,prog__amount__con_ni__3,unnamed:_235,fund_year_3,prog__amount__paed__4,prog__amount__pse__4,prog__amount__rw__4,prog__amount__con__4,prog__amount__con_ni__4,unnamed:_242,fund_year_4,unnamed:_244,unnamed:_245,unnamed:_246,unnamed:_247,unnamed:_248,unnamed:_249,unnamed:_250,unnamed:_251,unnamed:_252
191,N,5,3,,,,,Infrastructure - Medium,"3-Folsom, City of-1",Riley Street Sidewalks Project,SAC,5288,"Folsom, City of",50 E Natoma Street,Folsom,95630,Ryan Chance,Senior Civil Engineer,rchance@folsom.ca.us,916-461-6713,Yes,00461S,03-5288R,No,,,,,,6,6,,,7,7,,,1,1,,,In the City of Folsom on Riley Street between ...,"Sacramento County, City of Folsom, Riley Stree...",Project is located within one of the ten large...,SACOG,No,0,38.67,-121.17,"Riley Street in Folsom, between Sutter Street ...",,,,No,,Yes,,Yes,,No,No,0,1,Yes,100,Yes,0,,0,Yes,No,0,0,,"Sidewalk gap closures, curb ramps and crosswal...",0,0,0,0,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,75,0,0,...,0,0,0,0,0,0,0,N,0,N,0,N,0,N,0,,0,N,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [29]:
# filter_col = [col for col in cleaned if col.startswith('unnamed')]

In [30]:
# remove columns that have some agency specific information
cleaned = cleaned.drop(columns=columns_to_drop)

In [31]:
# remove columns that were manually entered - last 34 columns
cleaned = cleaned.drop(columns=(cleaned.iloc[:, 199:]))

In [32]:
# remove columns that are blank and unnamed
# cleaned=cleaned.drop(columns=filter_col)

In [33]:
# cleaned.columns.get_loc("original_prog__amt___pa_ed_")

In [34]:
# (cleaned.iloc[:, 199:].columns.tolist())

In [35]:
# making sure they are null
# (cleaned.iloc[1:, 199:]).info()

In [36]:
cleaned.sample()

Unnamed: 0,awarded,project_cycle,a2_ct_dist,#,atp_id,ppno,ppno_1,a3_proj_type,project_app_id,a2_info_proj_name,a2_county,a1_locode,a1_imp_agcy_name,a1_imp_agcy_street,a1_imp_agcy_city,a1_imp_agcy_zip,a1_imp_agcy_title,a1_imp_agcy_ma,a1_imp_agcy_state_ma_num,a1_imp_agcy_fed_ma_num,a1_proj_partner_exists,a1_proj_partner_agcy,a1_proj_partner_title,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,congressional_district,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,senate_district,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a2_info_proj_descr,a2_info_proj_loc,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_current_plan,a3_trails,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo
149,N,5,7,,,,,Non-Infrastructure,"7-Los Angeles, City of-1",Safe Routes to School Active Transportation Ed...,LA,5006,"Los Angeles, City of","100 N. Main Street, 9th Floor",Los Angeles,90012,Safe Routes to School Director,Yes,07-00152S,07-5006F15,Yes,Los Angeles Unified School District / Division...,"Coordinator: Health, Social, Emotional Learnin...","53, 59",53,59.0,,"34, 37, 40",34,37.0,40.0,"30, 33",30,33.0,,"A highly innovative, instruction-centric model...",The one-quarter mile radius from the 39 LAUSD ...,Project is located within one of the ten large...,SCAG,Yes,10,34.01,-118.26,Jefferson/South Central Community of Schools (...,,,,No,,Yes,,No,,Yes,No,0,13,No,0,Yes,0,,0,Yes,No,No,Yes,"Mobility Plan 2035, Vision Zero LA, Green New ...","7,290 PE bicycle skills classes, 180 Active Tr...",0,0,0,0,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,0,0,0,0,0,0,0,0,0,,0,,0,0,0,0,0,0,0,0,0,0,,0,0,0,0,0,0,No,No,No,,0,0,,0,0,0,0,0,0,0,N,0,Y,11,Y,89,N,0,,0,N,,0,0,27,0,2,0,0,6,7290,0,450,252,0,0,0,0,186,0,0,180,Active Transportation STEM-Curriculum Classes,390,Student Safety Leadership Sessions,N,N,N,N,N,N,,,N,N,Y,Y,N,N,,N,N,N,N,No


### How do the merged and cleaned up columns match up?

In [37]:
df.columns.intersection(cleaned.columns)

Index(['a1_imp_agcy_city', 'a1_imp_agcy_fed_ma_num', 'a1_imp_agcy_ma',
       'a1_imp_agcy_name', 'a1_imp_agcy_state_ma_num', 'a1_imp_agcy_street',
       'a1_imp_agcy_title', 'a1_imp_agcy_zip', 'a1_proj_partner_agcy',
       'a1_proj_partner_exists',
       ...
       'a4_collab_non_profit', 'a4_collab_schools', 'a4_collab_pub_works',
       'a4_collab_other', 'a4_colab_other_descr', 'a4_plan_ped',
       'a4_plan_bike', 'a4_plan_atp', 'a4_plan_school_routes',
       'a4_row_open_street_demo'],
      dtype='object', length=191)

In [38]:
# columns in df not in cleaned
df.columns.difference(cleaned.columns)

Index(['a1_letter_of_intent', 'agency_app_num', 'app_fk', 'app_pk',
       'attch_addtl_attachments', 'attch_app_sig_page',
       'attch_conditions_photos', 'attch_conditions_project_map',
       'attch_engineeers_checklist', 'attch_exhibit22_plan',
       'attch_letters_of_support', 'attch_link', 'attch_ni_workplan',
       'attch_project_estimate', 'awarded_x', 'awarded_y',
       'completed_pdf_form', 'details_datetime_stamp', 'main_datetime_stamp',
       'matches'],
      dtype='object')

In [39]:
# columns in cleaned not in df
cleaned.columns.difference(df.columns)

Index(['#', 'assembly_district', 'atp_id', 'awarded', 'congressional_district',
       'ppno', 'ppno_1', 'senate_district'],
      dtype='object')

In [40]:
len(cleaned)

454

## Assembly Congressional and Senate Districts

In [308]:
(
    cleaned
    >> select(
        _.a1_imp_agcy_name,
        _.assembly_district,
        _.a2_assem_dist_a,
        _.a2_assem_dist_b,
        _.a2_assem_dist_c,
        _.congressional_district,
        _.a2_congress_dist_a,
        _.a2_congress_dist_b,
        _.a2_congress_dist_c,
        _.senate_district,
        _.a2_senate_dist_a,
        _.a2_senate_dist_b,
        _.a2_senatedistc,
    )
).info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 454 entries, 0 to 453
Data columns (total 13 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   a1_imp_agcy_name        454 non-null    object 
 1   assembly_district       454 non-null    object 
 2   a2_assem_dist_a         454 non-null    int64  
 3   a2_assem_dist_b         62 non-null     float64
 4   a2_assem_dist_c         29 non-null     float64
 5   congressional_district  454 non-null    object 
 6   a2_congress_dist_a      454 non-null    int64  
 7   a2_congress_dist_b      58 non-null     float64
 8   a2_congress_dist_c      26 non-null     float64
 9   senate_district         454 non-null    object 
 10  a2_senate_dist_a        454 non-null    int64  
 11  a2_senate_dist_b        48 non-null     float64
 12  a2_senatedistc          26 non-null     float64
dtypes: float64(6), int64(3), object(4)
memory usage: 46.2+ KB


In [309]:
ad = (
    cleaned
    >> select(
        _.a1_imp_agcy_name,
        _.assembly_district,
        _.a2_assem_dist_a,
        _.a2_assem_dist_b,
        _.a2_assem_dist_c,
        # _.congressional_district,
        # _.a2_congress_dist_a,
        # _.a2_congress_dist_b,
        # _.a2_congress_dist_c,
        # _.senate_district,
        # _.a2_senate_dist_a,
        # _.a2_senate_dist_b,
        # _.a2_senatedistc,
    )
)

In [310]:
ad.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 454 entries, 0 to 453
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   a1_imp_agcy_name   454 non-null    object 
 1   assembly_district  454 non-null    object 
 2   a2_assem_dist_a    454 non-null    int64  
 3   a2_assem_dist_b    62 non-null     float64
 4   a2_assem_dist_c    29 non-null     float64
dtypes: float64(2), int64(1), object(2)
memory usage: 17.9+ KB


In [311]:
(
    ad
    >> filter(_.a2_assem_dist_a <= 4)
)

Unnamed: 0,a1_imp_agcy_name,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c
6,Butte County,"1, 3",1,3.00,
8,"Santa Barbara, City of",37,0,3.00,7.00
12,"San Bernardio, City of",40,4,0.00,
28,Round Valley Indians Tribe,2,2,,
30,"Santa Barbara, City of",37,0,3.00,7.00
...,...,...,...,...,...
401,"Coachella, City of",56,0,5.00,6.00
411,"Marysville, City of",3,3,,
413,Mendocino Council of Governments,2,2,,
445,Siskiyou County Transportation Commission,1,1,,


In [312]:
## Need to join two columns together if they have values

In [313]:
## code help: https://stackoverflow.com/questions/52889130/how-to-remove-zeros-after-decimal-from-string-remove-all-zero-after-dot
#ad["a2_assem_dist_b"].map("{0:g}".format)

In [314]:
# ad["assem_dist_combined"] =ad["a2_assem_dist_b"].map("{0:g}".format) + ad[
#     "a2_assem_dist_c"
# ].map("{0:g}".format)

In [315]:
ad["assem_dist_combined"] = ""

In [316]:
## doesnt work
# cols = ['a2_assem_dist_b', 'a2_assem_dist_c']
# for col in cols:
#     if (row.a2_assem_dist_b < 10) and (row.a2_assem_dist_c < 10):
#         ad['assem_dist_combined'] = (ad['a2_assem_dist_b'].map('{0:g}'.format) + ad['a2_assem_dist_c'].map('{0:g}'.format))

In [317]:
##code help:
# https://stackoverflow.com/questions/49091259/pandas-looping-through-rows-and-skipping-over-rows
# https://stackoverflow.com/questions/36774602/concatenate-two-numerical-values-to-make-a-new-column-using-pandas

In [318]:
# for index, row in df.iterrows(df):
#     if (row.a2_assem_dist_b < 10) and (row.a2_assem_dist_c < 10):
#         df['assem_dist_combined'] = df['a2_assem_dist_b'].map('{0:g}'.format) + df['a2_assem_dist_c'].map('{0:g}'.format)
#     else:
#         df['assem_dist_combined'] = 0

In [319]:
(ad>> filter(_.a2_assem_dist_c.notnull()))

Unnamed: 0,a1_imp_agcy_name,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,assem_dist_combined
8,"Santa Barbara, City of",37,0,3.0,7.0,
30,"Santa Barbara, City of",37,0,3.0,7.0,
36,"West Covina, City of","48, 55",48,,55.0,
47,"Santa Barbara, City of",37,0,3.0,7.0,
55,"Santa Barbara, City of",37,0,3.0,7.0,
89,"Delano, City of",32,0,3.0,2.0,
96,"Delano, City of",32,0,3.0,2.0,
112,Alameda County,25,0,2.0,5.0,
119,San Diego Association of Governments (SANDAG),"80, 79, 78",80,79.0,78.0,
122,Santa Barbara County,35,0,3.0,5.0,


In [320]:
## Code help: https://stackoverflow.com/questions/56119307/pandas-conditionally-concat-two-columns
mask = (ad["a2_assem_dist_b"] < 10).fillna(False)

ad["assem_dist_combined2"] = ad.loc[mask, "a2_assem_dist_b"].map("{0:g}".format) + ad[
    "a2_assem_dist_c"
].map("{0:g}".format)

In [321]:
(ad>> filter(_.a2_assem_dist_c.notnull()))

Unnamed: 0,a1_imp_agcy_name,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,assem_dist_combined,assem_dist_combined2
8,"Santa Barbara, City of",37,0,3.0,7.0,,37.0
30,"Santa Barbara, City of",37,0,3.0,7.0,,37.0
36,"West Covina, City of","48, 55",48,,55.0,,
47,"Santa Barbara, City of",37,0,3.0,7.0,,37.0
55,"Santa Barbara, City of",37,0,3.0,7.0,,37.0
89,"Delano, City of",32,0,3.0,2.0,,32.0
96,"Delano, City of",32,0,3.0,2.0,,32.0
112,Alameda County,25,0,2.0,5.0,,25.0
119,San Diego Association of Governments (SANDAG),"80, 79, 78",80,79.0,78.0,,
122,Santa Barbara County,35,0,3.0,5.0,,35.0


In [322]:
# ad["assem_dist_combined2"] = np.where(
#         ad.assem_dist_combined2.isnull(),
#         ad["a2_assem_dist_b"], ad["assem_dist_combined2"])

In [323]:
ad["assem_dist_combined2"] = np.where(
        ad.assem_dist_combined2.isnull(),
        (ad["a2_assem_dist_b"].map("{0:g}".format) + ', ' + ad["a2_assem_dist_c"].map("{0:g}".format)),
         ad["assem_dist_combined2"])

In [324]:
(ad>> filter(_.a2_assem_dist_c.notnull()))

Unnamed: 0,a1_imp_agcy_name,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,assem_dist_combined,assem_dist_combined2
8,"Santa Barbara, City of",37,0,3.0,7.0,,37
30,"Santa Barbara, City of",37,0,3.0,7.0,,37
36,"West Covina, City of","48, 55",48,,55.0,,"nan, 55"
47,"Santa Barbara, City of",37,0,3.0,7.0,,37
55,"Santa Barbara, City of",37,0,3.0,7.0,,37
89,"Delano, City of",32,0,3.0,2.0,,32
96,"Delano, City of",32,0,3.0,2.0,,32
112,Alameda County,25,0,2.0,5.0,,25
119,San Diego Association of Governments (SANDAG),"80, 79, 78",80,79.0,78.0,,"79, 78"
122,Santa Barbara County,35,0,3.0,5.0,,35


In [325]:
ad>>filter(_.assem_dist_combined2.str.contains('nan'))

Unnamed: 0,a1_imp_agcy_name,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,assem_dist_combined,assem_dist_combined2
0,Merced County,21,21,,,,"nan, nan"
1,"Santa Ana, City of",69,69,,,,"nan, nan"
2,"Pacifica, City of",22,22,,,,"nan, nan"
3,"Santa Ana, City of",69,69,,,,"nan, nan"
4,"Santa Ana, City of",69,69,,,,"nan, nan"
...,...,...,...,...,...,...,...
449,"San Rafael, City of",10,10,,,,"nan, nan"
450,"Stockton, City of",13,13,,,,"nan, nan"
451,"Long Beach, City of",70,70,,,,"nan, nan"
452,"Napa, City of",4,4,,,,"nan, nan"


In [326]:
(ad[~ad.assem_dist_combined2.str.contains(", nan")])

Unnamed: 0,a1_imp_agcy_name,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,assem_dist_combined,assem_dist_combined2
6,Butte County,"1, 3",1,3.0,,,3nan
8,"Santa Barbara, City of",37,0,3.0,7.0,,37
12,"San Bernardio, City of",40,4,0.0,,,0nan
30,"Santa Barbara, City of",37,0,3.0,7.0,,37
36,"West Covina, City of","48, 55",48,,55.0,,"nan, 55"
47,"Santa Barbara, City of",37,0,3.0,7.0,,37
55,"Santa Barbara, City of",37,0,3.0,7.0,,37
89,"Delano, City of",32,0,3.0,2.0,,32
96,"Delano, City of",32,0,3.0,2.0,,32
112,Alameda County,25,0,2.0,5.0,,25


In [327]:
#df['Col4'] = df[['a2_assem_dist_b', 'a2_assem_dist_c']].apply(lambda x: ','.join(x.dropna()), axis=1)

In [328]:
## another attempt:

## this will combine all 
## code help: https://stackoverflow.com/questions/55526620/how-to-combine-non-null-entries-of-columns-of-a-dataframe-into-a-new-column
#df["assem_dist_combined3"] = df.agg(lambda x: x.dropna().str.cat(sep=','), axis=1)

## this combines set columns
## code help: https://stackoverflow.com/questions/45787782/combine-multiple-columns-in-pandas-excluding-nans
cols = ['a2_assem_dist_b', 'a2_assem_dist_c']
ad["assem_dist_combined3"] = ad[cols].agg(lambda x: x.dropna().tolist(), axis=1)


In [329]:
(ad[~ad.assem_dist_combined2.str.contains(", nan")])

Unnamed: 0,a1_imp_agcy_name,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,assem_dist_combined,assem_dist_combined2,assem_dist_combined3
6,Butte County,"1, 3",1,3.0,,,3nan,[3.0]
8,"Santa Barbara, City of",37,0,3.0,7.0,,37,"[3.0, 7.0]"
12,"San Bernardio, City of",40,4,0.0,,,0nan,[0.0]
30,"Santa Barbara, City of",37,0,3.0,7.0,,37,"[3.0, 7.0]"
36,"West Covina, City of","48, 55",48,,55.0,,"nan, 55",[55.0]
47,"Santa Barbara, City of",37,0,3.0,7.0,,37,"[3.0, 7.0]"
55,"Santa Barbara, City of",37,0,3.0,7.0,,37,"[3.0, 7.0]"
89,"Delano, City of",32,0,3.0,2.0,,32,"[3.0, 2.0]"
96,"Delano, City of",32,0,3.0,2.0,,32,"[3.0, 2.0]"
112,Alameda County,25,0,2.0,5.0,,25,"[2.0, 5.0]"


In [330]:
ad>>filter(_.a2_assem_dist_b>=10) 

Unnamed: 0,a1_imp_agcy_name,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,assem_dist_combined,assem_dist_combined2,assem_dist_combined3
18,"Bakersfield, City of","32, 34",32,34.0,,,"34, nan",[34.0]
19,"Bakersfield, City of","32, 34",32,34.0,,,"34, nan",[34.0]
25,"Bakersfield, City of","32, 34",32,34.0,,,"34, nan",[34.0]
38,Alameda County,"18, 20",18,20.0,,,"20, nan",[20.0]
45,"Lynwood, City of","63, 64",63,64.0,,,"64, nan",[64.0]
46,"Bakersfield, City of","32, 34",32,34.0,,,"34, nan",[34.0]
80,"Bakersfield, City of","32, 34",32,34.0,,,"34, nan",[34.0]
99,"Fresno, City of","23, 31",23,31.0,,,"31, nan",[31.0]
119,San Diego Association of Governments (SANDAG),"80, 79, 78",80,79.0,78.0,,"79, 78","[79.0, 78.0]"
121,"Oxnard, City of","37, 44",37,44.0,,,"44, nan",[44.0]


Requirements for function:
* when `a2_assem_dist_a` == 0 AND `a2_assem_dist_b` & `assem_dist_c` are less than 10, **then combine of `2_assem_dist_b` & `assem_dist_c` to one number.**
* when `a2_assem_dist_a` is less than 10 AND `a2_assem_dist_b` is less than 10 AND `assem_dist_c` is null, **then take combine `2_assem_dist_a` & `assem_dist_b` (can be one number or two)**
* when `a2_assem_dist_a` == 1 AND `a2_assem_dist_b` is less than 10, **then combine `a2_assem_dist_a` & `2_assem_dist_b` with a comma**
* when `a2_assem_dist_a` is notnull AND `a2_assem_dist_b` & `assem_dist_c` are null,** then `ssembly_district`== `2_assem_dist_a`**
* when `a2_assem_dist_a` & `a2_assem_dist_b` is >= 10 AND `assem_dist_c` is null, **then take combination of `2_assem_dist_a` & `assem_dist_b` with a comma**
* when `a2_assem_dist_a` & `a2_assem_dist_c` is >= 10 AND `assem_dist_b` is null, **then take combination of `2_assem_dist_a` & `assem_dist_c` with a comma**


In [349]:
def format_districts(df):
    
    cols = ['a2_assem_dist_a','a2_assem_dist_b','a2_assem_dist_c']
    for col in cols:
        df[col] = df[col].apply(pd.to_numeric, errors='coerce')
    #     df[col] = df[col].map('{0:g}'.format)
    
    def district_status(row):
        if (row.a2_assem_dist_a == 0) and (row.a2_assem_dist_b < 10) and (row.a2_assem_dist_c < 10):
            return df["a2_assem_dist_b"].map('{0:g}'.format) + df["a2_assem_dist_c"].map('{0:g}'.format)
        elif (row.a2_assem_dist_a < 10) and (row.a2_assem_dist_b < 10) and (row.a2_assem_dist_c.isna()):
            return df["a2_assem_dist_a"].map('{0:g}'.format) + df["a2_assem_dist_b"].map('{0:g}'.format)
        elif (row.a2_assem_dist_a>=1) and (row.a2_assem_dist_b.isna()) and (row.a2_assem_dist_c.isna()):
            return df["a2_assem_dist_a"].map('{0:g}'.format)
        elif (row.a2_assem_dist_a >=10) and (row.a2_assem_dist_b>=10) and (row.a2_assem_dist_c.isna()):
            return (df["a2_assem_dist_a"].map('{0:g}'.format) + ', ' + df["a2_assem_dist_b"].map('{0:g}'.format))
        elif (row.a2_assem_dist_a >=10) and (row.a2_assem_dist_c>=10) and (row.a2_assem_dist_b.isnull()):
            return  (df["a2_assem_dist_a"].map('{0:g}'.format) + ', ' + df["a2_assem_dist_c"].map('{0:g}'.format))
        else:
            return "Add Manually"
    
    df["assem_dist_combined5"] = df.apply(lambda x: district_status(x), axis=1)
  
    return df
    

In [364]:
#format_districts(ad)

In [351]:
(ad>>filter(_.a2_assem_dist_b!=('nan'))).sample(5)

Unnamed: 0,a1_imp_agcy_name,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,assem_dist_combined,assem_dist_combined2,assem_dist_combined3
385,"Los Angeles, City of",46,46,,,,"nan, nan",[]
281,"Chula Vista, City of",80,80,,,,"nan, nan",[]
201,"Roseville, City of",4,4,,,,"nan, nan",[]
373,"South San Francisco, City of",22,22,,,,"nan, nan",[]
219,"National City, City of",80,80,,,,"nan, nan",[]


In [356]:
df['a2_assem_dist_a'] = df['a2_assem_dist_a'].apply(pd.to_numeric, errors='coerce')

## Null columns

In [103]:
# finding columns with all null and dropping for now. will keep in script

In [104]:
alldatanull = alldata.columns[alldata.isna().all()].tolist()

In [105]:
alldata = alldata.drop(columns=alldatanull)

In [106]:
dfnull = df.columns[df.isna().all()].tolist()
df = df.drop(columns=dfnull)

In [107]:
cleanednull = cleaned.columns[cleaned.isna().all()].tolist()
cleaned = cleaned.drop(columns=cleanednull)

## Changing Column Types

In [108]:
df.a2_mpo.value_counts()

SCAG        315
MTC         132
SANDAG       62
Caltrans     52
SACOG        52
SJCOG        38
TCAG         37
AMBAG        34
KCOG         31
COFCG        31
SBCAG        19
SLOCOG       15
BCAG         14
TMPO         13
SRTA         11
StanCOG      11
MCTC          7
KCAG          3
MCAG          3
CVAG          2
Name: a2_mpo, dtype: int64

In [109]:
df.details_datetime_stamp.info()

<class 'pandas.core.series.Series'>
Int64Index: 882 entries, 0 to 881
Series name: details_datetime_stamp
Non-Null Count  Dtype         
--------------  -----         
882 non-null    datetime64[ns]
dtypes: datetime64[ns](1)
memory usage: 46.1 KB


In [110]:
compare_col = np.where(
    df["main_datetime_stamp"] == df["details_datetime_stamp"], True, False
)
df["compare_datetime"] = compare_col
df.compare_datetime.value_counts()

True     763
False    119
Name: compare_datetime, dtype: int64

In [111]:
# some datetimes are seconds different others are larger
(
    df
    >> filter(_.compare_datetime == False)
    >> select(_.details_datetime_stamp, _.main_datetime_stamp)
)

Unnamed: 0,details_datetime_stamp,main_datetime_stamp
27,2020-09-10 16:23:42,2020-09-10 16:23:41
37,2020-09-11 12:48:11,2020-09-11 12:48:10
55,2020-09-10 14:14:03,2020-09-10 14:14:02
67,2020-09-14 19:20:56,2020-09-14 19:20:55
75,2020-09-11 16:29:11,2020-09-11 16:29:10
...,...,...
869,2022-06-16 12:10:18,2022-06-21 11:28:23
871,2022-06-16 12:12:24,2022-06-16 12:12:23
872,2022-06-16 12:15:24,2022-06-16 12:15:23
874,2022-06-16 10:57:37,2022-06-16 10:57:36


In [112]:
df = df.drop(columns="compare_datetime")

In [113]:
df.sample(2)

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans_exists,a3_plan_bicycle_exists,a3_plan_ped_exists,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_letters_of_support,attch_ni_workplan,attch_project_estimate,main_datetime_stamp,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,matches,assem_dist_combined,assem_dist_combined2,assem_dist_combined4
183,Rancho Cordova,03-5482R,Yes,"Rancho Cordova, City of",00266S,2729 Prospect Park Drive,Senior Civil Engineer,95670,,,No,,8,,,7,,,Sacramento,3,"Install bicycle lanes, pedestrian crossing, ad...",On Olson Drive from Folsom Boulevard to Zinfan...,Olson Drive Corridor Safety Enhancement Project,Project is located within one of the ten large...,SACOG,Yes,1,38.59,-121.28,Olson Island is a commercial district within t...,,4,8.0,,No,Yes,Yes,Yes,Infrastructure - Small,Yes,52,0,Yes,48,No,0,,0,No,2,2412,Part C Att K - Other R.pdf,Part C Att A SignatureR.pdf,Part C Att E Photos-Existing V2R.pdf,PART C Att D Olson Corr Project Impvt Plan V7R...,Part C Att B Engineers Checklist R.pdf,Part C Att I Support Ltrs Olson DriveR.pdf,,Part C Att F Eng Est Olson V3R.pdf,2020-09-15 15:36:46,"3-Rancho Cordova, City of-2",CYCLE 5,N,5482,0,0,,"This project will construct 6,820 LF Class II ...",Yes,0,0,6820,0,0,0,0,0,0,0,0,0,,0,,0,6820,0,0,0,0,0,0,0,,0,,0,0,0,13,1,0,0,1,2,0,0,0,,0,,0,0,0,0,0,0,0,5,0,0,,0,0.0,0,0,0,0,Yes,No,No,,0,0,,0,0,0,0,7,0,0,2412,2020-09-15 15:36:46,N,0,N,0,N,0,N,0,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,,"nan, nan",[]
106,Lompoc,05-5080R,Yes,"Lompoc, City of",00269S,100 Civic Center Plaza,Civil Engineering Associate III,93436,,,No,,35,,,24,,,Santa Barbara,5,Construct Sidewalk and ADA compliant pedestria...,"M St - Ocean Ave to Laurel Ave, N St - Walnut ...",Lompoc High School Corridor Sidewalk Infill an...,Project is located within one of the ten large...,SBCAG,Yes,2,34.64,-120.46,Barriers and safety issues prevent mobility of...,,19,,,Yes,No,No,No,Infrastructure - Small,No,0,2,Yes,100,Yes,0,,0,No,1,2330,Atch K - Warrant Checks - 4L-101 (CA) MUTCD Fl...,Atch A - Signature-Page_Signed.pdf,Atch E - Photos of Existing Conditions.pdf,"Atch 8,D - Project Plans_Gaps_Connections_Barr...",Atch B - Engr-Checklist signed_sealed.pdf,Atch I - Letters of Support.pdf,,Atch F - Project Cost Estimate.pdf,2020-09-15 13:32:51,"5-Lompoc, City of-1",CYCLE 5,N,5080,0,0,Pedestrian and Bicycle Master Plan (ATP),Installation of 4 Rapid Flashing Beacon Crossw...,Yes,0,0,0,0,0,0,0,0,0,0,0,0,,0,,0,0,0,0,0,0,0,0,0,,0,,0,0,0,100,1,0,0,1,5,0,1750,0,,0,,0,0,0,0,0,0,0,0,0,0,,0,0.0,3,0,3,0,No,Yes,No,,0,0,,0,0,0,0,0,0,0,2330,2020-09-15 13:32:51,N,0,N,0,N,0,N,0,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,,"nan, nan",[]


In [114]:
df.columns.tolist()

['a1_imp_agcy_city',
 'a1_imp_agcy_fed_ma_num',
 'a1_imp_agcy_ma',
 'a1_imp_agcy_name',
 'a1_imp_agcy_state_ma_num',
 'a1_imp_agcy_street',
 'a1_imp_agcy_title',
 'a1_imp_agcy_zip',
 'a1_letter_of_intent',
 'a1_proj_partner_agcy',
 'a1_proj_partner_exists',
 'a1_proj_partner_title',
 'a2_assem_dist_a',
 'a2_assem_dist_b',
 'a2_assem_dist_c',
 'a2_congress_dist_a',
 'a2_congress_dist_b',
 'a2_congress_dist_c',
 'a2_county',
 'a2_ct_dist',
 'a2_info_proj_descr',
 'a2_info_proj_loc',
 'a2_info_proj_name',
 'a2_mop_uza_population',
 'a2_mpo',
 'a2_past_proj',
 'a2_past_proj_qty',
 'a2_proj_lat',
 'a2_proj_long',
 'a2_proj_scope_summary',
 'a2_rtpa',
 'a2_senate_dist_a',
 'a2_senate_dist_b',
 'a2_senatedistc',
 'a3_plan_active_trans_exists',
 'a3_plan_bicycle_exists',
 'a3_plan_ped_exists',
 'a3_plan_srts_exists',
 'a3_proj_type',
 'a3_st_bicycle_applies',
 'a3_st_bicycle_pct',
 'a3_st_num_schools',
 'a3_st_ped_applies',
 'a3_st_ped_pct',
 'a3_st_srts',
 'a3_trail_elig_cost',
 'a3_trail_fed

### Add Geometry

In [115]:
from dla_utils import _dla_utils
from shared_utils import geography_utils

In [116]:
gdf = geography_utils.create_point_geometry(
    df, longitude_col="a2_proj_long", latitude_col="a2_proj_lat"
)

In [131]:
gdf.sample(1)

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans_exists,a3_plan_bicycle_exists,a3_plan_ped_exists,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_letters_of_support,attch_ni_workplan,attch_project_estimate,main_datetime_stamp,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,matches,assem_dist_combined,assem_dist_combined2,assem_dist_combined4,geometry
641,Visalia,06-5044R,Yes,"Visalia, City of",00005S,315 E Acequia Ave,City Engineer,93291,,,No,,26,,,22,,,Tulare,6,Reconstruction of a 6.1-mile Class 1 multi-use...,The GVC project is a 6.1-mile long Class 1 mul...,Goshen-Visalia Corridor (GVC) Improvement Project,Project is located within one of the ten large...,TCAG,No,0,,,The Goshen-Visalia Corridor (GVC) project is a...,,16,,,Yes,No,No,No,Infrastructure - Large,Yes,50,6,Yes,50,Yes,0,No,0,Yes,1,3712,Attachment K_Additional Information.pdf,Attachment-A-Signature Page.pdf,Attachment E_Existing Conditions.pdf,Attachment-D Improvement Plans.pdf,Attachment B_GVC_Signed.pdf,Attachment I.pdf,,Attachment-F-Project-Estimate.pdf,2022-06-15 16:58:31,"6-Visalia, City of-1",CYCLE 6,N,5044,0,0,Central Visalia Traffic Safety Action Plan,Improve pedestrian/bicyclist safety and mobili...,Yes,0,1500,0,0,0,0,0,0,0,0,0,0,Extend Multi use path Fill Gap,0,Reconstruct existing multi use trail,0,0,0,0,0,30000,0,0,0,Reconstruct existing multi use trail,0,,0,0,88,2,0,9,0,0,1,21200,0,0,Reconstruct existing multi use trail,0,,0,18,0,9,1,2,10,0,14,600,Native Resilient species,0,,0,2,0,0,No,Yes,No,Mountable bulb outs ramps,0,0,Narrower Travel Lanes,0,0,0,0,0,0,0,3712,2022-06-15 16:58:31,N,0,N,0,N,0,N,0,,N,,0,0,0,0,0,0,,0,0,0,0,0,0,0,0,0,0,,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,,"nan, nan",[],POINT EMPTY


### Change columns to integers

In [118]:
def get_num(x):
    try:
        return int(x)
    except Exception:
        try:
            return float(x)
        except Exception:
            return x

In [119]:
columns_to_int = [
    "a1_locode",
    "a2_senatedistc",
    "a2_senate_dist_b",
    "a2_assem_dist_b",
    "a2_assem_dist_c",
    "a2_congress_dist_b",
    "a2_congress_dist_c",
    "a2_proj_lat",
    "a2_proj_long",
    "a2_senate_dist_b",
    "a2_senatedistc",
    "p_un_sig_inter_new_roundabout",
    "a4_emp_based_pct",
    "a4_le_methods",
    "a4_srts_le",
    "a1_locode",
    "a2_senatedistc",
    "a2_senate_dist_b",
]

In [120]:
# gdf[columns_to_int] = gdf[columns_to_int].apply(get_num)

In [121]:
for col in columns_to_int:
    gdf[col] = gdf[col].apply(get_num)

In [122]:
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 882 entries, 0 to 881
Columns: 204 entries, a1_imp_agcy_city to geometry
dtypes: category(1), datetime64[ns](2), float64(12), geometry(1), int64(97), object(91)
memory usage: 1.4+ MB


In [123]:
gdf.select_dtypes("int64")

Unnamed: 0,a1_imp_agcy_zip,a2_assem_dist_a,a2_congress_dist_a,a2_ct_dist,a2_past_proj_qty,a2_senate_dist_a,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_pct,a3_trail_elig_cost,a3_trail_trans_pct,agency_app_num,app_pk,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_qty_1,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_improv_qty_1,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_qty_1,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,a4_reg_init_pct,a4_com_init_pct,a4_safe_route_pct,a4_fl_mile_pct,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_training,a4_act_other_1,a4_act_other_2
0,95340,21,16,10,0,12,20,1,80,0,0,1,1802,0,0,0,1500,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,1500,0,0,0,5,0,4,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1802,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,92702,69,46,12,2,34,50,0,50,0,0,4,1811,0,0,0,6336,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,38,0,15,16,0,18,3,0,0,1,0,18,0,0,0,0,0,0,8800,0,0,0,0,1811,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,94044,22,14,4,0,13,50,2,50,0,0,1,1804,0,0,13752,5748,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,40,2,0,0,0,20,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1804,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,92702,69,46,12,2,34,0,5,100,0,0,13,1822,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,50,0,0,0,0,60,0,0,0,3,3,218,1000,7,0,0,1,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,1822,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,92702,69,46,12,4,34,0,5,100,0,0,14,1823,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,50,0,0,0,0,43,0,0,0,7,2,189,3455,5,0,0,1,0,0,0,0,2,5,0,0,0,0,0,0,0,0,0,0,0,1823,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
877,91362,0,0,7,0,0,95,0,5,0,0,1,3192,0,0,0,0,24820,0,0,0,0,0,0,0,0,0,324,2,2,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,265,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3192,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
878,91733,49,32,7,5,22,75,4,25,0,0,1,3859,0,0,5100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,23000,4,0,16,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,3859,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
879,95113,25,19,4,1,15,60,0,40,0,0,3,3860,9,0,0,0,6840,0,0,0,0,0,0,0,8,6,0,0,0,4,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,60,0,48,6840,8,0,0,2,0,0,0,0,0,4,4,6840,0,0,0,0,0,0,0,0,0,3860,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
880,93101,0,0,5,2,0,25,1,75,0,0,2,3845,2,0,1700,22410,0,0,0,0,0,0,0,0,10,24,0,0,0,0,0,0,0,0,0,0,0,0,20,1,0,0,1,110,0,2820,0,0,0,23,0,0,0,0,0,0,0,0,0,0,42,17,2700,0,0,0,0,0,0,0,0,0,3845,50,0,50,0,0,2,4,4,0,0,0,0,0,4,2,0,0,0,0,0,0,0,0


In [124]:
gdf.select_dtypes("object")

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_county,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_proj_scope_summary,a2_rtpa,a3_plan_active_trans_exists,a3_plan_bicycle_exists,a3_plan_ped_exists,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_ped_applies,a3_st_srts,a3_trail_fed_funding,a3_trails,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_letters_of_support,attch_ni_workplan,attch_project_estimate,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_other_bike_improv_1,b_other_bike_improv_2,m_other_trail_imprv_1,m_other_trail_imprv_2,p_other_ped_imprv_1,p_other_ped_imprv_2,p_amenities_shade_tree_type,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_other_traffic_calming_imprv_2,a4_reg_init,a4_com_init,a4_safe_route,a4_fl_mile,a4_other_ni,a4_other_ni_descr,a4_act_other_1_descr,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,assem_dist_combined,assem_dist_combined2,assem_dist_combined4
0,Merced,10-5939R,Yes,Merced County,00033S,345 west 7th street,Deputy Director,,,No,,Merced,"PA&ED, PS&E, and CON funding for construction ...",1) South side of Haskell Ave from Cody ave to ...,Planada Sidewalk Infill Project,Project is located outside one of the ten larg...,MCAG,No,The Planada Sidewalk Infill Project is located...,,No,Yes,Yes,No,Infrastructure - Small,Yes,Yes,Yes,,No,Planada Sidewalk infill ATP cross section 1.pdf,Attachment A- Signature Page.pdf,Existing Photos Attachment.pdf,Planada ATP Plan Concept.pdf,Attachment-B-Engr-Checklist (MH).pdf,Letters of Support.pdf,,Project Estimate.pdf,10-Merced County-1,CYCLE 5,N,5939,No,No,,Sidewalk infill along portions of Haskell aven...,No,,,,,,,,No,No,Yes,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,,"nan, nan",[]
1,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,,,No,,Orange,Bishop Street Class 3 Bicycle Boulevard with T...,Bishop Street from Flower Street to Standard A...,Bishop Street Bicycle Boulevard Project,Project is located within one of the ten large...,SCAG,Yes,This project will implement a Class 3 bicycle ...,,Yes,Yes,No,Yes,Infrastructure - Medium,Yes,Yes,No,,No,Attachment K - Not Applicable.pdf,Attachment A - Signature Page.pdf,Attachment E - Photos of Existing Conditions.pdf,Attachment D - Project .Plans.pdf,Attachment B - Checklist.pdf,Attachment I - Letter of Support.pdf,Attachment G - Not Applicable.pdf,Attachment F - Cost .Estimate.pdf,"12-Santa Ana, City of-4",CYCLE 5,N,5063,No,No,,"Install 1.15 mile bike boulevard, construction...",Yes,,,,,,,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,,"nan, nan",[]
2,City of Pacifica,04-5350-F15,Yes,"Pacifica, City of",,151 Milagra Drive,Associate Civil Engineer,,,No,,San Mateo,CON funding for installing bicycling facilitie...,On Palmetto Ave between Paloma Ave and West Av...,Palmetto Ave - Esplanade Ave Bicycle & Pedestr...,Project is located outside one of the ten larg...,MTC,No,The project will install a combination of Clas...,,No,Yes,Yes,No,Infrastructure - Small,Yes,Yes,No,,No,,Attachment-A-Signature-page.pdf,Photos.pdf,Attachment D_Palmetto & Esplanade Ped-Bike Imp...,Attachment B_Engineers Checklist.pdf,Letters of Support.pdf,,Attachment F_ ATP Cycle 5_Palmetto-Esplanade B...,"4-Pacifica, City of-1",CYCLE 5,N,5350,0,0,,Bicycling and pedestrian amenities will be ins...,Yes,,,,,,,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,,"nan, nan",[]
3,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,,,No,,Orange,Pedestrian traffic safety improvements for Jef...,"In the City of Santa Ana, the safe routes to s...",Jefferson ES_Thorpe Fundamental_McFadden Int_G...,Project is located within one of the ten large...,SCAG,Yes,"This project will be repairing, replacing and ...",,Yes,Yes,No,Yes,Infrastructure - Large,No,Yes,Yes,,No,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D -Plans.pdf,Attachment B - Check list.pdf,Attachment I - Letter of Support.pdf,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,"12-Santa Ana, City of-13",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 8 intersections, ...",Yes,,,,,Left Turn Arrow,Enhanced Crosswalk Unsignalized,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,,"nan, nan",[]
4,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,,,No,,Orange,Pedestrian traffic safety improvements for La...,"In the City of Santa Ana, the safe routes to s...",Lathrop Intermediate_Lowell ES_Martin ES_Pio P...,Project is located within one of the ten large...,SCAG,Yes,"This project will be repairing, replacing and ...",,Yes,Yes,No,Yes,Infrastructure - Large,No,Yes,Yes,,No,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D - Plan.pdf,Attachment B - Checklist.pdf,Attachment I - Letter of Support.pdf,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,"12-Santa Ana, City of-14",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 6 intersections, ...",Yes,,,,,Enhance crosswalk (unsignalized),Raised Crosswalk,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,,"nan, nan",[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
877,Thousand Oaks,07-5292F15,Yes,"Thousand Oaks, City of",00252,2100 Thousand Oaks Blvd.,Transportation Planner,,,No,,Ventura,"Construction funding for Class IV bikelanes, ...","In the City of Thousand Oaks, Lynn Road betwee...",Lynn Road Bike Lanes and Pedestrain Improvements,Project is located within one of the ten large...,SCAG,No,The project is located on 4.5-miles of Lynn R...,,Yes,No,No,No,Infrastructure - Small,Yes,Yes,No,,No,ATTACHMENT K.pdf,Attachement A Signed.pdf,photoskn.pdf,Lynn Concept Plans.pdf,Attachment-B-Engr-Checklist_Lynn_SB.pdf,Letters of Support.pdf,,Attachment-F-Project-Estimate_Lynn_SB.xlsx,"7-Thousand Oaks, City of-1",CYCLE 6,N,5392,No,Yes,Local Road Safety Plan,"265' new sidewalk, 2 rapid flashing beacons, 1...",Yes,,,,,,,,No,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,,44,"[4.0, 4.0]"
878,South El Monte,,Yes,"South El Monte, City of",07-5352S21,1415 Santa Anita Avenue,Community Development Director,Partner Agency Letter of Intent.pdf,City of El Monte,Yes,City Engineer,Los Angeles,Construct Class II bike lane segments; install...,Merced Avenue from Garvey Avenue to Fern Stree...,Merced Avenue Greenway,Project is located within one of the ten large...,SCAG,Yes,The project will implement bicyclist/pedestria...,,No,Yes,No,No,Infrastructure - Small,Yes,Yes,No,,No,Att K - Support Docs.pdf,Att A - Signature Page.pdf,Att E - Photos of Existing Conditions.pdf,Att D - Project Plans.pdf,Att B - Eng Checklist.pdf,Att I - Letters of Support.pdf,Att G - Not Applicable.pdf,Att F - Project Estimate.xlsx,"7-South El Monte, City of-1",CYCLE 6,N,5352,0,0,,Construct 0.97-mile Class II bike path; 4 enha...,Yes,,,,,,,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,,"nan, nan",[]
879,San Jose,04-5005F15,Yes,"San Jose, City of",00200S,200 E Santa Clara St,Senior Transportation Specialist,,,No,,Santa Clara,This project will decouple 2nd and 3rd street ...,The project is in SoFA arts district in southw...,2nd & 3rd Street De-Coupling and Complete Stre...,Project is located within one of the ten large...,MTC,Yes,"The City of San José, through its Downtown Tra...",,No,Yes,No,No,Infrastructure - Large,Yes,Yes,No,,No,attachment k.pdf,Attachment-A-Signature-Page (1)_jr (1).pdf,Attachment_G_Site_Photos.pdf,2_3DESIGNS.pdf,Attachment-B-Engr-Checklist- 2nd and 3rd.pdf,LOS.pdf,,2nd and 3rd ATP Engineers Estimate_Final.pdf,"4-San Jose, City of-3",CYCLE 6,N,5005,0,0,"Emerging mobility Action Plan, Carbon Neutral ...",Project constructs approximately 6840 feet of ...,Yes,Bike Ramps,Raised Intersections,,,Fully Bulbed (all 4 corners),,,Yes,No,No,Conversion of 1 to 2 way operation,<---- 0.68 miles,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,,"nan, nan",[]
880,Santa Barbara,05-5951R,Yes,Santa Barbara County,00100S,123 E. Anapuma St,Alternative Transportation Manager,,,No,,Santa Barbara,"Curb extensions, sidewalks and crosswalks for ...",Unincorporated neighborhood located south of E...,Isla Vista Bike and Pedestrian Improvements Pr...,Project is located outside one of the large MP...,SBCAG,Yes,"Isla Vista is a place like no other. 15,733 pe...",,Yes,No,No,No,Infrastructure + NI - Medium,Yes,Yes,Yes,,No,,Attachment A_Signature Page - 2022.pdf,Existing Conditions Photos.pdf,Isla Vista Community Improvements - ATP Cycle ...,Attachment B-Engr Checklist IV.pdf,Attachment I - Letters of Support 2022.pdf,Attachment-G-Exhibit-25-R-NI-Work-Plan - Isla ...,Attachment-F-Project-Estimate-IV Updated.pdf,5-Santa Barbara County-2,CYCLE 6,N,5951,0,0,Regional Transportation Plan,"Curb extensions, sidewalks, and bicycle networ...",Yes,Bike left-hand turn lanes,Class 2 conflict / intersection striping,,,,,,No,No,Yes,,,Y,N,Y,N,N,,,,N,N,Y,Y,N,N,,"Spanish, Mandarin",Y,N,Y,N,N,N,,N,N,N,N,No,N,,37,"[3.0, 7.0]"


In [125]:
gdf.select_dtypes("float64")

Unnamed: 0,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_b,a2_congress_dist_c,a2_proj_lat,a2_proj_long,a2_senate_dist_b,a2_senatedistc,p_un_sig_inter_new_roundabout,a4_emp_based_pct,a4_le_methods,a4_srts_le
0,,,,,37.00,120.00,,,0.00,0.00,0.00,0.00
1,,,,,33.00,117.00,,,6.00,0.00,0.00,0.00
2,,,,,37.00,-122.00,,,0.00,0.00,0.00,0.00
3,,,,,33.00,117.00,,,0.00,0.00,0.00,0.00
4,,,,,33.00,117.00,,,0.00,0.00,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...
877,4.00,4.00,2.00,6.00,,,2.00,7.00,,,,
878,,,,,,,,,,,,
879,,,,,,,,,,,,
880,3.00,7.00,2.00,4.00,,,1.00,9.00,,,,


#### Columns to add/change:
* ~geometry column for lat long~
* a2_senate_dist_b to int64
* a2_senatedistc to int64
* a1_locode to int64


#### Columns to maybe add
* a2_county acronym
*

In [126]:
df >> select(_.a2_county)

Unnamed: 0,a2_county
0,Merced
1,Orange
2,San Mateo
3,Orange
4,Orange
...,...
877,Ventura
878,Los Angeles
879,Santa Clara
880,Santa Barbara
