# ATP Data Exploration

In [1]:
import intake

import numpy as np
import pandas as pd

from calitp import to_snakecase

from dla_utils import _dla_utils

from shared_utils import altair_utils, styleguide
from siuba import *



In [2]:
pd.set_option("display.max_columns", 220)

## Reading in w/o utils

In [3]:
main_details = to_snakecase(
    pd.read_excel("gs://calitp-analytics-data/data-analyses/dla/atp/Main Details.xls")
)
project_details = to_snakecase(
    pd.read_excel(
        "gs://calitp-analytics-data/data-analyses/dla/atp/Project Details.xls"
    )
)

In [4]:
# main_details.info()

In [5]:
# project_details.project_cycle.value_counts()

In [6]:
# project_details>>count(_.project_app_id)>>filter(_.n>1)

In [7]:
# project_details>>group_by(_.project_cycle)>>count(_.project_app_id)>>filter(_.n>1)

* multiple project ids, but no duplicates by project cycle. 

In [8]:
## merging

In [9]:
# df = pd.merge(main_details, project_details, how="outer", on=["project_app_id", "project_cycle"], indicator='matches')

In [10]:
# (df>>filter(_.project_app_id =='1-Mendocino Council of Governments-1')>>select(_.project_app_id,
#                                                                               _.project_cycle,
#                                                                              _.matches,
#                                                                               _.agency_app_num))

In [11]:
# df.info()

### Comparing column names

code help: https://stackoverflow.com/questions/45482755/compare-headers-of-dataframes-in-pandas

In [12]:
# columns in both dfs
main_details.columns.intersection(project_details.columns)

Index(['project_app_id', 'project_cycle', 'awarded'], dtype='object')

In [13]:
# columns in main_details not in project_details
main_details.columns.difference(project_details.columns)

Index(['a1_imp_agcy_city', 'a1_imp_agcy_contact', 'a1_imp_agcy_email',
       'a1_imp_agcy_fed_ma_num', 'a1_imp_agcy_ma', 'a1_imp_agcy_name',
       'a1_imp_agcy_phone', 'a1_imp_agcy_state_ma_num', 'a1_imp_agcy_street',
       'a1_imp_agcy_title', 'a1_imp_agcy_zip', 'a1_letter_of_intent',
       'a1_locode', 'a1_proj_partner_agcy', 'a1_proj_partner_contact',
       'a1_proj_partner_email', 'a1_proj_partner_exists',
       'a1_proj_partner_phone', 'a1_proj_partner_title', 'a2_assem_dist_a',
       'a2_assem_dist_b', 'a2_assem_dist_c', 'a2_congress_dist_a',
       'a2_congress_dist_b', 'a2_congress_dist_c', 'a2_county', 'a2_ct_dist',
       'a2_info_proj_descr', 'a2_info_proj_loc', 'a2_info_proj_name',
       'a2_mop_uza_population', 'a2_mpo', 'a2_output_outcome', 'a2_past_proj',
       'a2_past_proj_qty', 'a2_proj_lat', 'a2_proj_long',
       'a2_proj_scope_summary', 'a2_project_location_map', 'a2_rtpa',
       'a2_senate_dist_a', 'a2_senate_dist_b', 'a2_senatedistc',
       'a3_current

In [14]:
# columns in project_details not in main_details
project_details.columns.difference(main_details.columns)

Index(['a4_act_other_1', 'a4_act_other_1_descr', 'a4_act_other_2',
       'a4_act_other_2_decr', 'a4_after_school', 'a4_bike_classes',
       'a4_bike_gap_pct', 'a4_bike_rodeos', 'a4_bike_train', 'a4_classrooms',
       ...
       'v_other_traffic_calming_imprv_2', 'v_other_traffic_calming_qty_1',
       'v_other_traffic_calming_qty_2', 'v_remove_right_turn_pocket',
       'v_remove_travel_ln', 'v_sig_inter_new_roundabout',
       'v_sig_inter_timing_improv', 'v_speed_feedback_signs',
       'v_un_sig_inter_new_roundabout', 'v_un_sig_inter_new_traf_sig'],
      dtype='object', length=132)

## Reading in w/ utils

In [15]:
import utils

In [16]:
df = utils.read_in_data()

In [17]:
df.head()

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_exhibit22_plan,attch_letters_of_support,attch_link,attch_ni_workplan,attch_project_estimate,completed_pdf_form,main_datetime_stamp,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,matches
0,Merced,10-5939R,Yes,Merced County,00033S,345 west 7th street,Deputy Director,95340,,,No,,21,,,16,,,Merced,10,"PA&ED, PS&E, and CON funding for construction ...",1) South side of Haskell Ave from Cody ave to ...,Planada Sidewalk Infill Project,Project is located outside one of the ten larg...,MCAG,No,0,37.29,120.31,The Planada Sidewalk Infill Project is located...,,,12,,,,No,,Yes,,Yes,,No,Infrastructure - Small,Yes,20,1,Yes,80,Yes,0,,0,No,1,1802,Planada Sidewalk infill ATP cross section 1.pdf,Attachment A- Signature Page.pdf,Existing Photos Attachment.pdf,Planada ATP Plan Concept.pdf,Attachment-B-Engr-Checklist (MH).pdf,,Letters of Support.pdf,,,Project Estimate.pdf,,2020-06-09 10:33:08,10-Merced County-1,CYCLE 5,N,5939,No,No,,Sidewalk infill along portions of Haskell aven...,No,0,0,0,1500,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,0,0,0,0,0,6,0,1500,0,,0,,0,5,0,4,3,0,0,0,0,0,,0,0.0,0,0,0,0,No,No,Yes,,0,0,,0,0,0,0,0,0,0,1802,2020-06-09 10:33:08,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both
1,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Bishop Street Class 3 Bicycle Boulevard with T...,Bishop Street from Flower Street to Standard A...,Bishop Street Bicycle Boulevard Project,Project is located within one of the ten large...,SCAG,Yes,2,33.74,117.86,This project will implement a Class 3 bicycle ...,,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Medium,Yes,50,0,Yes,50,No,0,,0,No,4,1811,Attachment K - Not Applicable.pdf,Attachment A - Signature Page.pdf,Attachment E - Photos of Existing Conditions.pdf,Attachment D - Project .Plans.pdf,Attachment B - Checklist.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost .Estimate.pdf,,2020-08-20 18:49:12,"12-Santa Ana, City of-4",CYCLE 5,N,5063,No,No,,"Install 1.15 mile bike boulevard, construction...",Yes,0,0,0,6336,0,,0,0,0,0,0,0,0,,0,,0,0,2,0,0,,0,0,0,0,,0,,0,0,0,100,0,0,0,0,0,0,0,0,,0,,0,38,0,15,16,0,18,3,0,0,,1,6.0,0,18,0,0,Yes,No,No,,0,0,,0,0,8800,0,0,0,0,1811,2020-08-20 18:49:12,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both
2,City of Pacifica,04-5350-F15,Yes,"Pacifica, City of",,151 Milagra Drive,Associate Civil Engineer,94044,,,No,,22,,,14,,,San Mateo,4,CON funding for installing bicycling facilitie...,On Palmetto Ave between Paloma Ave and West Av...,Palmetto Ave - Esplanade Ave Bicycle & Pedestr...,Project is located outside one of the ten larg...,MTC,No,0,37.65,-122.49,The project will install a combination of Clas...,,,13,,,,No,,Yes,,Yes,,No,Infrastructure - Small,Yes,50,2,Yes,50,No,0,,0,No,1,1804,,Attachment-A-Signature-page.pdf,Photos.pdf,Attachment D_Palmetto & Esplanade Ped-Bike Imp...,Attachment B_Engineers Checklist.pdf,,Letters of Support.pdf,,,Attachment F_ ATP Cycle 5_Palmetto-Esplanade B...,,2020-06-15 11:05:03,"4-Pacifica, City of-1",CYCLE 5,N,5350,0,0,,Bicycling and pedestrian amenities will be ins...,Yes,0,0,13752,5748,0,,0,0,0,0,0,0,0,,0,,0,0,0,1,0,,0,0,0,0,,0,,0,0,0,40,2,0,0,0,20,0,0,0,,0,,0,9,0,0,0,0,0,0,0,0,,0,0.0,0,0,0,0,Yes,No,No,,0,0,,0,0,0,0,0,0,0,1804,2020-06-15 11:05:03,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both
3,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Pedestrian traffic safety improvements for Jef...,"In the City of Santa Ana, the safe routes to s...",Jefferson ES_Thorpe Fundamental_McFadden Int_G...,Project is located within one of the ten large...,SCAG,Yes,2,33.71,117.89,"This project will be repairing, replacing and ...",,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Large,No,0,5,Yes,100,Yes,0,,0,No,13,1822,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D -Plans.pdf,Attachment B - Check list.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,,2020-09-08 10:15:52,"12-Santa Ana, City of-13",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 8 intersections, ...",Yes,0,0,0,0,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,50,0,0,0,0,60,0,0,0,Left Turn Arrow,3,Enhanced Crosswalk Unsignalized,3,218,1000,7,0,0,1,0,0,0,,0,0.0,0,7,0,0,Yes,No,No,,0,0,,0,0,0,0,0,0,0,1822,2020-09-08 10:15:52,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both
4,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Pedestrian traffic safety improvements for La...,"In the City of Santa Ana, the safe routes to s...",Lathrop Intermediate_Lowell ES_Martin ES_Pio P...,Project is located within one of the ten large...,SCAG,Yes,4,33.73,117.87,"This project will be repairing, replacing and ...",,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Large,No,0,5,Yes,100,Yes,0,,0,No,14,1823,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D - Plan.pdf,Attachment B - Checklist.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,,2020-08-31 12:34:31,"12-Santa Ana, City of-14",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 6 intersections, ...",Yes,0,0,0,0,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,50,0,0,0,0,43,0,0,0,Enhance crosswalk (unsignalized),7,Raised Crosswalk,2,189,3455,5,0,0,1,0,0,0,,0,0.0,2,5,0,0,Yes,No,No,,0,0,,0,0,0,0,0,0,0,1823,2020-08-31 12:34:31,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both


In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 882 entries, 0 to 881
Columns: 211 entries, a1_imp_agcy_city to matches
dtypes: category(1), datetime64[ns](2), float64(23), int64(97), object(88)
memory usage: 1.4+ MB


In [19]:
df.matches.value_counts()

both          882
left_only       0
right_only      0
Name: matches, dtype: int64

In [20]:
df.awarded_y.value_counts()

N    882
Name: awarded_y, dtype: int64

### Comparing merged df with cleaned data

In [21]:
#removing columns with agency staff information from cleaned df
columns_to_drop = [
    "a1_imp_agcy_contact",
    "a1_imp_agcy_email",
    "a1_imp_agcy_phone",
    "a1_proj_partner_contact",
    "a1_proj_partner_email",
    "a1_proj_partner_phone",
]

In [22]:
alldata = to_snakecase(
    pd.read_excel(
        "gs://calitp-analytics-data/data-analyses/dla/atp/Master_AllData_Cycle5FieldMapping.xls",
        sheet_name="AllData",
    )
)

In [23]:
alldata = alldata.drop(columns=columns_to_drop)

In [24]:
alldata.sample()

Unnamed: 0,awarded,project_cycle,a2_ct_dist,#,atp_id,ppno,ppno_1,a3_proj_type,project_app_id,a2_info_proj_name,a2_county,a1_locode,a1_imp_agcy_name,a1_imp_agcy_street,a1_imp_agcy_city,a1_imp_agcy_zip,a1_imp_agcy_title,a1_imp_agcy_ma,a1_imp_agcy_state_ma_num,a1_imp_agcy_fed_ma_num,a1_proj_partner_exists,a1_proj_partner_agcy,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a2_info_proj_descr,a2_info_proj_loc,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_current_plan,a3_trails,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo
146,N,CYCLE 5,6,,,,,Infrastructure - Small,6-California Department of Transportation-7,Ivanhoe Safe Route To School,Tulare,0,California Department of Transportation,1352 W. Olive Avenue,Fresno,93778,Senior Transportation Engineer,No,,,Yes,Tulare County Association of Governments (TCAG),Executive Director TCAG,26,,,22,,,14,,,Construction of pedestrian and bicycle improve...,In Tulare County and the community of Ivanhoe ...,Project is located outside one of the ten larg...,TCAG,No,0,36.39,-119.22,The proposed shared-use path would travel alon...,,,,Yes,,Yes,,Yes,,Yes,Yes,25,1,Yes,75,Yes,0,,0,Yes,No,0,0,Ivanhoe Safe Routes Feasibility Study 2019,The intersection improvements will provide bet...,0,0,0,0,0,,0,0,0,0,0,1,0,Shared use railroad crossing,2,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,100,0,0,100,0,9,0,790,30,Shared use railroad crossing,2,Transit waiting area,2,0,0,0,6,0,0,0,0,0,,0,0,0,6,0,0,No,Yes,Yes,Intersection Improvement,0,1,,0,0,0,0,0,0,0,N,0,N,0,N,0,N,0,,0,N,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No


In [25]:
cleaned = to_snakecase(
    pd.read_excel(
        "gs://calitp-analytics-data/data-analyses/dla/atp/Master_AllData_Cycle5FieldMapping.xls",
        sheet_name="AllDataFieldMapping Cleaned",
    )
)

In [26]:
cleaned.sample()

Unnamed: 0,awarded,project_cycle,a2_ct_dist,#,atp_id,ppno,ppno_1,a3_proj_type,project_app_id,a2_info_proj_name,a2_county,a1_locode,a1_imp_agcy_name,a1_imp_agcy_street,a1_imp_agcy_city,a1_imp_agcy_zip,a1_imp_agcy_contact,a1_imp_agcy_title,a1_imp_agcy_email,a1_imp_agcy_phone,a1_imp_agcy_ma,a1_imp_agcy_state_ma_num,a1_imp_agcy_fed_ma_num,a1_proj_partner_exists,a1_proj_partner_agcy,a1_proj_partner_contact,a1_proj_partner_title,a1_proj_partner_email,a1_proj_partner_phone,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,congressional_district,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,senate_district,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a2_info_proj_descr,a2_info_proj_loc,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_current_plan,a3_trails,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,...,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,original_prog__amt___pa_ed_,orig__prog__year__pa_ed_,original_prog__amt___ps_e_,orig__prog__year__ps_e_,original_prog__amt___rw_,orig__prog__year__rw_,orignal_prog__amt___con_,orig__prog__year__con_,original_prog__amt___con_ni_,orig__prog__year__con_ni_,unnamed:_215,prog__amount__paed__1,prog__amount__pse__1,prog__amount__rw__1,prog__amount__con__1,prog__amount__con_ni__1,unnamed:_221,fund_year_1,prog__amount__paed__2,prog__amount__pse__2,prog__amount__rw__2,prog__amount__con__2,prog__amount__con_ni__2,unnamed:_228,fund_year_2,prog__amount__paed__3,prog__amount__pse__3,prog__amount__rw__3,prog__amount__con__3,prog__amount__con_ni__3,unnamed:_235,fund_year_3,prog__amount__paed__4,prog__amount__pse__4,prog__amount__rw__4,prog__amount__con__4,prog__amount__con_ni__4,unnamed:_242,fund_year_4,unnamed:_244,unnamed:_245,unnamed:_246,unnamed:_247,unnamed:_248,unnamed:_249,unnamed:_250,unnamed:_251,unnamed:_252
207,N,5,8,,,,,Infrastructure - Small,"8-Apple Valley, Town of-1",Yucca Loma Elementary School Safe Routes to Sc...,SBD,5453,"Apple Valley, Town of",14955 Dale Evans Parkway,Apple Valley,92307,Julie Ryan,Special Projects Manager,jryan@applevalley.org,760-240-7000,Yes,00349S,08-5453R,No,,,,,,33,33,,,8,8,,,21,21,,,Install new sidewalk at school neighborhood; i...,Rancherias Road 565 feet south of Yucca Loma R...,Project is located within one of the ten large...,SCAG,Yes,1,34.5,-117.2,Yucca Loma Elementary School Safe Routes to Sc...,,,,No,,No,,No,,Yes,Yes,50,1,Yes,50,Yes,0,,0,Yes,No,0,0,,"Installation of new sidewalk, ADA ramps, and h...",0,0,0,0,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,0,0,0,...,0,0,0,0,0,0,0,N,0,N,0,N,0,N,0,,0,N,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [27]:
# filter_col = [col for col in cleaned if col.startswith('unnamed')]

In [28]:
# remove columns that have some agency specific information
cleaned = cleaned.drop(columns=columns_to_drop)

In [29]:
# remove columns that were manually entered - last 34 columns
cleaned = cleaned.drop(columns=(cleaned.iloc[:, 199:]))

In [30]:
# remove columns that are blank and unnamed
# cleaned=cleaned.drop(columns=filter_col)

In [31]:
# cleaned.columns.get_loc("original_prog__amt___pa_ed_")

In [32]:
# (cleaned.iloc[:, 199:].columns.tolist())

In [33]:
# making sure they are null
# (cleaned.iloc[1:, 199:]).info()

In [34]:
cleaned.sample()

Unnamed: 0,awarded,project_cycle,a2_ct_dist,#,atp_id,ppno,ppno_1,a3_proj_type,project_app_id,a2_info_proj_name,a2_county,a1_locode,a1_imp_agcy_name,a1_imp_agcy_street,a1_imp_agcy_city,a1_imp_agcy_zip,a1_imp_agcy_title,a1_imp_agcy_ma,a1_imp_agcy_state_ma_num,a1_imp_agcy_fed_ma_num,a1_proj_partner_exists,a1_proj_partner_agcy,a1_proj_partner_title,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,congressional_district,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,senate_district,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a2_info_proj_descr,a2_info_proj_loc,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_current_plan,a3_trails,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo
255,N,5,8,,,,,Infrastructure - Large,"8-Moreno Valley, City of-1",South City Trail Project,RIV,5441,"Moreno Valley, City of",14177 Frederick Street,Moreno Valley,92553,Capital Projects Division Manager,Yes,00196S,08-5441R,No,,,61,61,,,41,41,,,31,31,,,Construction of a 2-mile multi-use trail paral...,"Located in the City of Moreno Valley, starting...",Project is located within one of the ten large...,SCAG,Yes,1,33.87,-117.23,The South City Trail project in the City of Mo...,,,,No,,Yes,,No,,Yes,Yes,50,6,Yes,50,Yes,0,No,0,Yes,Yes,0,0,"Capital Improvement Plan, Master Plan of Trails",Construction of a 2-mile multi-use trail will ...,0,11000,0,0,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,New bike/ped trail as outlined above,0,,0,0,6,50,1,1,0,0,0,0,0,0,,0,,0,0,0,0,0,0,0,0,6,0,,0,0,0,0,0,0,No,Yes,No,,0,0,,0,0,0,0,0,0,0,N,0,N,0,N,0,N,0,,0,N,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No


### How do the merged and cleaned up columns match up?

In [35]:
df.columns.intersection(cleaned.columns)

Index(['a1_imp_agcy_city', 'a1_imp_agcy_fed_ma_num', 'a1_imp_agcy_ma',
       'a1_imp_agcy_name', 'a1_imp_agcy_state_ma_num', 'a1_imp_agcy_street',
       'a1_imp_agcy_title', 'a1_imp_agcy_zip', 'a1_proj_partner_agcy',
       'a1_proj_partner_exists',
       ...
       'a4_collab_non_profit', 'a4_collab_schools', 'a4_collab_pub_works',
       'a4_collab_other', 'a4_colab_other_descr', 'a4_plan_ped',
       'a4_plan_bike', 'a4_plan_atp', 'a4_plan_school_routes',
       'a4_row_open_street_demo'],
      dtype='object', length=191)

In [36]:
# columns in df not in cleaned
df.columns.difference(cleaned.columns)

Index(['a1_letter_of_intent', 'agency_app_num', 'app_fk', 'app_pk',
       'attch_addtl_attachments', 'attch_app_sig_page',
       'attch_conditions_photos', 'attch_conditions_project_map',
       'attch_engineeers_checklist', 'attch_exhibit22_plan',
       'attch_letters_of_support', 'attch_link', 'attch_ni_workplan',
       'attch_project_estimate', 'awarded_x', 'awarded_y',
       'completed_pdf_form', 'details_datetime_stamp', 'main_datetime_stamp',
       'matches'],
      dtype='object')

In [37]:
# columns in cleaned not in df
cleaned.columns.difference(df.columns)

Index(['#', 'assembly_district', 'atp_id', 'awarded', 'congressional_district',
       'ppno', 'ppno_1', 'senate_district'],
      dtype='object')

In [38]:
len(cleaned)

454

## Assembly, Congressional and Senate Districts

In [39]:
ad = (
    cleaned
    >> select(
        _.a1_imp_agcy_name,
        _.assembly_district,
        _.a2_assem_dist_a,
        _.a2_assem_dist_b,
        _.a2_assem_dist_c,
        # _.congressional_district,
        # _.a2_congress_dist_a,
        # _.a2_congress_dist_b,
        # _.a2_congress_dist_c,
        # _.senate_district,
        # _.a2_senate_dist_a,
        # _.a2_senate_dist_b,
        # _.a2_senatedistc,
    )
)

In [40]:
ad.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 454 entries, 0 to 453
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   a1_imp_agcy_name   454 non-null    object 
 1   assembly_district  454 non-null    object 
 2   a2_assem_dist_a    454 non-null    int64  
 3   a2_assem_dist_b    62 non-null     float64
 4   a2_assem_dist_c    29 non-null     float64
dtypes: float64(2), int64(1), object(2)
memory usage: 17.9+ KB


### Unsuccessful Methods

In [41]:
## Need to join two columns together if they have values

In [42]:
## code help: https://stackoverflow.com/questions/52889130/how-to-remove-zeros-after-decimal-from-string-remove-all-zero-after-dot
#ad["a2_assem_dist_b"].map("{0:g}".format)

In [43]:
##code help:
# https://stackoverflow.com/questions/49091259/pandas-looping-through-rows-and-skipping-over-rows
# https://stackoverflow.com/questions/36774602/concatenate-two-numerical-values-to-make-a-new-column-using-pandas

In [44]:
# ## Code help: https://stackoverflow.com/questions/56119307/pandas-conditionally-concat-two-columns
# mask = (ad["a2_assem_dist_b"] < 10).fillna(False)

# ad["assem_dist_combined2"] = ad.loc[mask, "a2_assem_dist_b"].map("{0:g}".format) + ad[
#     "a2_assem_dist_c"
# ].map("{0:g}".format)

In [45]:
# ad["assem_dist_combined2"] = np.where(
#         ad.assem_dist_combined2.isnull(),
#         ad["a2_assem_dist_b"], ad["assem_dist_combined2"])

In [46]:
# ad["assem_dist_combined2"] = np.where(
#         ad.assem_dist_combined2.isnull(),
#         (ad["a2_assem_dist_b"].map("{0:g}".format) + ', ' + ad["a2_assem_dist_c"].map("{0:g}".format)),
#          ad["assem_dist_combined2"])

In [47]:
## another attempt:

## this will combine all 
## code help: https://stackoverflow.com/questions/55526620/how-to-combine-non-null-entries-of-columns-of-a-dataframe-into-a-new-column
#df["assem_dist_combined3"] = df.agg(lambda x: x.dropna().str.cat(sep=','), axis=1)

## this combines set columns
## code help: https://stackoverflow.com/questions/45787782/combine-multiple-columns-in-pandas-excluding-nans
# cols = ['a2_assem_dist_b', 'a2_assem_dist_c']
# ad["assem_dist_combined3"] = ad[cols].agg(lambda x: x.dropna().tolist(), axis=1)


### Function

Requirements for function:
* when `a2_assem_dist_a` == 0 AND `a2_assem_dist_b` & `assem_dist_c` are less than 10, **then combine of `2_assem_dist_b` & `assem_dist_c` to one number.**
* when `a2_assem_dist_a` is less than 10 AND `a2_assem_dist_b` is less than 10 AND `assem_dist_c` is null, **then take combine `2_assem_dist_a` & `assem_dist_b` (can be one number or two)**
* when `a2_assem_dist_a` == 1 AND `a2_assem_dist_b` is less than 10, **then combine `a2_assem_dist_a` & `2_assem_dist_b` with a comma**
* when `a2_assem_dist_a` is notnull AND `a2_assem_dist_b` & `assem_dist_c` are null,** then `ssembly_district`== `2_assem_dist_a`**
* when `a2_assem_dist_a` & `a2_assem_dist_b` is >= 10 AND `assem_dist_c` is null, **then take combination of `2_assem_dist_a` & `assem_dist_b` with a comma**
* when `a2_assem_dist_a` & `a2_assem_dist_c` is >= 10 AND `assem_dist_b` is null, **then take combination of `2_assem_dist_a` & `assem_dist_c` with a comma**


In [48]:
def format_districts(df, col_a, col_b, col_c, new_col):
    
    #rename columns to alias
    df = df.rename(columns = {col_a:'a',
                              col_b:'b',
                              col_c:'c'})
    #fix types
    df = df.astype({'a':'Int64',
                    'b':'Int64',
                    'c':'Int64'})
    
    #replace null values with numeric
    df["a"].fillna(9999999, inplace=True)
    df["b"].fillna(9999999, inplace=True)
    df["c"].fillna(9999999, inplace=True)
    
    def district_status(row):
        if (row.a == 0) and (row.b < 10) and (row.c < 10):
            return (str(row["b"])) + (str(row["c"]))
        
        elif (row.a < 10) and (row.b < 10) and not (row.c == 9999999):
            return (str(row["a"])) + (str(row["b"]))
        
        elif (row.a>=1) and (row.b == 9999999) and (row.c == 9999999):
            return (row["a"])
        
        elif (row.a >= 10) and (row.b>= 10) and (row.c == 9999999):
            return (str(row["a"])) + ', ' + (str(row["b"]))
        
        elif (row.a >= 10) and (row.b == 9999999)  and (row.c >= 10):
            return  (str(row["a"])) + ', ' + (str(row["c"]))
        
        elif (row.a >= 1) and (row.b == 0) and (row.c == 0):
            return  (str(row["a"])) 
        
        elif (row.a >= 1) and not (row.b == 0) and not (row.b == 9999999) and not (row.c == 9999999):
            return  (str(row["a"])) + ', ' + (str(row["b"])) + ', ' + (str(row["c"]))
        
        else:
            return "Needs Manual Assistance"
    
    #apply function
    df[new_col] = df.apply(lambda x: district_status(x), axis=1)
    
    #replace values back to null
    df = df.replace({'a': 9999999, 'b': 9999999, 'c':9999999}, np.nan)
    
    #rename columns back to original
    df = df.rename(columns = {'a':col_a,
                              'b':col_b,
                              'c':col_c})
  
    return df
    

In [94]:
## test on the subsetted df 
## still using cleaned df
(format_districts(ad, "a2_assem_dist_a", "a2_assem_dist_b", "a2_assem_dist_c", "assembly_district2")).sample(20)

Unnamed: 0,a1_imp_agcy_name,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,assembly_district2
229,"Desert Hot Springs, City of",56,56,,,56
262,"Waterford, City of",12,12,,,12
364,"Los Angeles, City of","39, 46",39,46.0,,"39, 46"
251,Riverside County,67,67,,,67
405,"Patterson, City of",21,21,,,21
299,San Joaquin Regional Rail Commission,13,13,,,13
263,"Buellton, City of",37,37,,,37
32,Alameda County,20,20,,,20
366,"Pico Rivera, City of",58,58,,,58
245,"San Diego, City of","77, 78, 79",77,78.0,79.0,"77, 78, 79"


In [50]:
## check which ones dont fit the arguments 
## still using cleaned df

(format_districts(ad, "a2_assem_dist_a", "a2_assem_dist_b", "a2_assem_dist_c", "assembly_district2"))>>filter(_.assembly_district2=="Needs Manual Assistance")

Unnamed: 0,a1_imp_agcy_name,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,assembly_district2
6,Butte County,"1, 3",1,3,,Needs Manual Assistance
12,"San Bernardio, City of",40,4,0,,Needs Manual Assistance
276,Yolo County,"4, 7",4,7,,Needs Manual Assistance
365,"Lynwood, City of","6, 3",6,3,,Needs Manual Assistance
380,"Vallejo, City of","14, 4",14,4,,Needs Manual Assistance


In [51]:
## test on main df with assembly districts 
df = ((format_districts(df, "a2_assem_dist_a", "a2_assem_dist_b", "a2_assem_dist_c", "assembly_district"))
 )
(df>>select(_.a2_assem_dist_a, _.a2_assem_dist_b, _.a2_assem_dist_c, _.assembly_district)).sample(20)

Unnamed: 0,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,assembly_district
574,29,,,29
777,63,,,63
665,55,,,55
16,13,,,13
86,54,,,54
244,9,,,9
804,14,,,14
598,0,4.0,3.0,43
550,24,28.0,,"24, 28"
766,1,2.0,,Needs Manual Assistance


In [52]:
## apply function for the other two columns: congressional district and senate district

df = (format_districts(df, "a2_congress_dist_a", "a2_congress_dist_b", "a2_congress_dist_c", "congressional_district"))
df = (format_districts(df, "a2_senate_dist_a", "a2_senate_dist_b", "a2_senatedistc", "senate_district"))

In [53]:
df.sample(5)

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_exhibit22_plan,attch_letters_of_support,attch_link,attch_ni_workplan,attch_project_estimate,completed_pdf_form,main_datetime_stamp,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,matches,assembly_district,congressional_district,senate_district
542,Simi Valley,07-5405F15,Yes,"Simi Valley, City of",00249S,2929 Tapo Canyon Road,Principal Engineer,93063,,,No,,38,,,25,,,Ventura,7,Plan for the development of bicycle facilities...,The plan location is within the approximately ...,Simi Valley Bicycle Master Plan,Project is located within one of the ten large...,SCAG,Yes,4,,,The project will update the 2008 Simi Valley B...,,,27,,,,No,,Yes,,No,,No,Plan,Yes,100,0,No,0,No,0,,0,No,1,3440,BicycleMasterPlan B.pdf,Attachment A for Plan (2).pdf,ATTACHMENT E EXISTING CONDITIONS.pdf,,,,ATP_Cycle6_VCTC_SupportLetter_SimiValley_Green...,,,,,2022-06-15 16:54:24,"7-Simi Valley, City of-1",CYCLE 6,N,5405,No,No,,Plan for the development of bicycle facilities...,Yes,0,9800,0,0,0,,0,0,0,0,0,4,0,Wayfinding/Monument Signs,4,,0,0,0,0,0,,9800,0,1,0,,0,,0,85,14,0,0,0,0,0,0,0,0,0,Signage,1,,0,0,0,0,0,0,0,0,15,4,"24"" Box California pepper",0,,0,0,0,0,Yes,No,No,,0,0,,0,0,0,0,0,0,0,3440,2022-06-14 19:43:48,N,0,N,0,N,0,N,0,,,N,,0,0,0,0,0,0,,0,0,0,0,0,0,0,0,0,0,,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,38,25,27
724,West Sacramento,03-5447R,Yes,"West Sacramento, City of",000445,1110 West Capitol Avenue,Supervising Transportation Planner,95691,Sacramento_Letter_of_Intent_Resolution.pdf,City of Sacramento,Yes,Senior Engineer,4,6.0,,7,7.0,,Yolo,3,Convert the upper deck of the historic I Stree...,Project spans Sacramento River from 2nd/D Stre...,I Street Bridge Deck Conversion for Active Tra...,Project is located within one of the ten large...,SACOG,Yes,5,,,"The I Street Bridge is a narrow, historic, dou...",,,3,8.0,,,Yes,,Yes,,Yes,,Yes,Infrastructure - Large,Yes,50,0,Yes,50,No,0,,0,No,1,3631,I ST Deck Conversion for Active Transportation...,"Attachment A, signature page_completed.pdf",Attachment E-Photos of Existing Conditions.pdf,Attachment D-Map_Plans Existing_Proposed_Condi...,Attachment-B-Engr-Checklist-Signed.pdf,,Attachment I-Letters of Support & Documentatio...,,,Attachment-F-Project-Estimate_DeckConversion.pdf,,2022-06-15 15:21:14,"3-West Sacramento, City of-1",CYCLE 6,N,5447,0,0,"Parks Master Plan, Mobility Action Plan",Adaptive Reuse of historic infrastructure with...,Yes,0,3115,0,0,1200,,0,0,0,0,0,2,0,Green Cycletrack Lane Striping,2,Bike Route Signs,4,0,0,0,0,,0,0,0,0,,0,,0,0,8,25,0,0,3115,1,4,0,0,3115,Historic/Cultural Interpretive Signs,4,Wayfinding Signs,8,1,1200,0,0,0,0,0,4,10,To Be Determined,0,,0,0,0,0,No,Yes,Yes,,0,0,,0,0,0,0,0,0,0,3631,2022-06-15 15:21:14,N,0,N,0,N,0,N,0,,,N,,0,0,0,0,0,0,,0,0,0,0,0,0,0,0,0,0,,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,Needs Manual Assistance,Needs Manual Assistance,Needs Manual Assistance
491,San Diego,11-6066R,Yes,San Diego Association of Governments (SANDAG),00037S,"401 B Street, Suite 800",Senior Regional Planner,92101,02_ATP6_Central Ave_Part A1_LOI_COSD.pdf,City of San Diego,Yes,Program Manager,78,79.0,,51,52.0,,San Diego,11,"Construct low-stress bike facilities, cul-de-s...","Located in the City of San Diego, specifically...",Central Avenue Bikeway - The Missing Link,Project is located within one of the ten large...,SANDAG,Yes,7,,,The Central Avenue Bikeway (Project) provides ...,,,39,,,,Yes,,Yes,,No,,No,Infrastructure - Medium,Yes,70,35,Yes,30,Yes,0,,0,No,2,3316,Central Avenue Bikeway TSIA Feb 2018.pdf,ATP6_Central Ave_Attachment-A-Signature-Page_H...,2022.06.10_Central_Existing Proposed Exhibit.pdf,2021.11.23 - Central Final.pdf,2022.06.08_Central_Attachment-B-Engr-Checklist...,,2022.06.09_Letters of Support_Compiled.pdf,,,2022.06.13_Central_Attachment-F-Project-Estima...,,2022-06-13 22:51:06,11-San Diego Association of Governments (SANDA...,CYCLE 6,N,6066,0,0,Caltrans SR15,Construct 2.4 lane-miles of bicycle facilities...,Yes,0,0,0,10298,330,,1,0,0,0,0,0,0,No Right Turn of Red blank out signs,6,Bike Ramps,3,0,0,0,6,,0,0,0,0,,0,,0,0,0,60,0,5,0,0,7,0,269,0,Truncated domes,6,Leading Pedestrian Intervals (LPI),2,4,0,5,1,52,1,5,0,0,,0,,3,2,0,0,No,Yes,No,Cul-de-sac (1) & Speed Cushions (2),0,3,Transit Priority Signal,1,0,335,0,5,0,0,3316,2022-06-13 22:51:06,N,0,N,0,N,0,N,0,,,N,,0,0,0,0,0,0,,0,0,0,0,0,0,0,0,0,0,,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,"78, 79","51, 52",39
829,Santa Cruz,,Yes,Santa Cruz Health Services Agency,05-6448,1080 Emeline Avenue,Senior Health Educator,95060,,,No,,30,,,20,,,Santa Cruz,5,Provide a variety of SRTS programs to students...,12 Watsonville area K-12th schools in the Paja...,Safe Routes for Watsonville School Families an...,Project is located outside one of the large MP...,AMBAG,Yes,5,,,"For a city of its size, Watsonville’s streets ...",,SCCRTC,17,,,,No,,No,,No,,No,Non-Infrastructure,No,0,12,No,0,Yes,0,,0,No,2,3851,Attachments A-G.pdf,Attachment A Signature Page ATP6 HSA.pdf,Existing Conditions Photos.pdf,,,,HSA ATP6 Letters of Support.pdf,,Attachment-G-Exhibit-25-R-NI-Work-Plan HSA.xlsx,,,2022-06-28 17:51:01,5-Santa Cruz Health Services Agency-2,CYCLE 6,N,6448,Yes,No,,Development of traffic gardens and use of temp...,Yes,0,0,0,0,0,,0,0,0,0,0,0,0,,0,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,0,0,0,0,0,0,0,0,0,,0,,0,0,0,0,0,0,0,0,0,0,,0,,0,0,0,0,No,No,No,,0,0,,0,0,0,0,0,0,0,3851,2022-06-16 08:59:04,N,0,Y,30,Y,70,N,0,,,N,,0,0,12,16,6,0,,8,172,28,20,72,100,0,0,8,96,,0,12,community bike rides.,16,senior traffic safety classes with walking fie...,N,N,Y,Y,N,N,,Spanish,Y,N,Y,Y,Y,N,,N,N,N,N,No,N,both,30,20,17
146,Fresno,,No,California Department of Transportation,,1352 W. Olive Avenue,Senior Transportation Engineer,93778,,Tulare County Association of Governments (TCAG),Yes,Executive Director TCAG,26,,,22,,,Tulare,6,Construction of pedestrian and bicycle improve...,In Tulare County and the community of Ivanhoe ...,Ivanhoe Safe Route To School,Project is located outside one of the ten larg...,TCAG,No,0,36.39,-119.22,The proposed shared-use path would travel alon...,,,14,,,,Yes,,Yes,,Yes,,Yes,Infrastructure - Small,Yes,25,1,Yes,75,Yes,0,,0,No,7,2195,Invanhoe Safe Route to School Flyer PDF.pdf,ATP Cycle 5 Signature Page Ivanhoe.pdf,Ivanhoe Pictures.pdf,Attachment-D-Ivanhoe_Plans.pdf,Attachment-B-Engr-Checklist Ivanhoe.pdf,,Letters of Support ATP Ivanhoe.pdf,,,Attachment-F-Project Estimate Escalated.pdf,,2020-09-15 09:25:42,6-California Department of Transportation-7,CYCLE 5,N,0,0,0,Ivanhoe Safe Routes Feasibility Study 2019,The intersection improvements will provide bet...,Yes,0,0,0,0,0,,0,0,0,0,0,1,0,Shared use railroad crossing,2,,0,0,0,0,0,,0,0,0,0,,0,,0,0,0,100,0,0,100,0,9,0,790,30,Shared use railroad crossing,2,Transit waiting area,2,0,0,0,6,0,0,0,0,0,,0,0.0,0,6,0,0,No,Yes,Yes,Intersection Improvement,0,1,,0,0,0,0,0,0,0,2195,2020-09-15 09:25:42,N,0,N,0,N,0,N,0,,0.0,N,,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,,0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,26,22,14


## Change 0 values to Null

In [54]:
df_zero = df.loc[:, df.eq(0).any()]

# df[, 12:18][df[, 12:18] == 0] <- NA



In [55]:
df_zero_list = df_zero.columns.to_list()

In [56]:
## note: we might want to take out the assembly, congress and senate districts from this list

In [57]:
df_zero_list

['a2_assem_dist_a',
 'a2_assem_dist_b',
 'a2_assem_dist_c',
 'a2_congress_dist_a',
 'a2_congress_dist_b',
 'a2_congress_dist_c',
 'a2_past_proj_qty',
 'a2_senate_dist_a',
 'a2_senate_dist_b',
 'a2_senatedistc',
 'a3_st_bicycle_pct',
 'a3_st_num_schools',
 'a3_st_ped_pct',
 'a3_trail_elig_cost',
 'a3_trail_trans_pct',
 'agency_app_num',
 'b_sig_inter_new_bike_boxes',
 'b_class_1',
 'b_class_2',
 'b_class_3',
 'b_class_4',
 'b_light_intersection',
 'b_mid_block_new_rrfb_signal',
 'b_mid_block_surf_improv',
 'b_bsp_new_bikes',
 'b_bike_new_secured_lockers',
 'b_bike_new_racks',
 'b_bsp_new_station',
 'b_other_bike_improv_qty_1',
 'b_other_bike_improv_qty_2',
 'b_light_rdwy_seg',
 'b_sig_inter_timing_improv',
 'b_un_sig_new_rrfb_signal',
 'b_un_sig_cross_surf_improv',
 'm_cls_1_trails_widen_recon_exist',
 'm_cls_1_trails_new__less_than_9',
 'm_cls_1_trails_new_over_9',
 'm_non_cls_trails_new',
 'm_other_trail_improv_qty_1',
 'm_other_trail_improv_qty_2',
 'm_non_cls_widen_recon_exist',
 'p

In [58]:
#df[df_zero_list] = df[df_zero_list].replace({'0':np.nan, 0:np.nan})

In [59]:
def convert_zeros_to_nan(df):
    df_zero = df.loc[:, df.eq(0).any()]
    df_zero.drop(['a2_assem_dist_b','a2_assem_dist_c', 'a2_congress_dist_b', 'a2_congress_dist_c', 'a2_senate_dist_b', 'a2_senatedistc',
              'a2_past_proj_qty', 'a3_st_num_schools', 'agency_app_num',
             'a3_st_ped_pct', 'a3_trail_trans_pct', 'a4_ped_gap_pct',  'a4_reg_init_pct', 'a4_com_init_pct',
              'a4_safe_route_pct', 'a4_fl_mile_pct', 'a4_emp_based_pct', 'a4_other_ni_pct'
             ], axis=1, inplace=True)
    df_zero_list = df_zero.columns.to_list()
    df[df_zero_list] = df[df_zero_list].replace({'0':np.nan, 0:np.nan})
    
    return df

In [60]:
df = convert_zeros_to_nan(df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [61]:
df.head()

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_exhibit22_plan,attch_letters_of_support,attch_link,attch_ni_workplan,attch_project_estimate,completed_pdf_form,main_datetime_stamp,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,matches,assembly_district,congressional_district,senate_district
0,Merced,10-5939R,Yes,Merced County,00033S,345 west 7th street,Deputy Director,95340,,,No,,21,,,16,,,Merced,10,"PA&ED, PS&E, and CON funding for construction ...",1) South side of Haskell Ave from Cody ave to ...,Planada Sidewalk Infill Project,Project is located outside one of the ten larg...,MCAG,No,0,37.29,120.31,The Planada Sidewalk Infill Project is located...,,,12,,,,No,,Yes,,Yes,,No,Infrastructure - Small,Yes,20.0,1,Yes,80,Yes,,,0,No,1,1802,Planada Sidewalk infill ATP cross section 1.pdf,Attachment A- Signature Page.pdf,Existing Photos Attachment.pdf,Planada ATP Plan Concept.pdf,Attachment-B-Engr-Checklist (MH).pdf,,Letters of Support.pdf,,,Project Estimate.pdf,,2020-06-09 10:33:08,10-Merced County-1,CYCLE 5,N,5939,No,No,,Sidewalk infill along portions of Haskell aven...,No,,,,1500.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,6.0,,1500.0,,,,,,5.0,,4.0,3.0,,,,,,,,,,,,,No,No,Yes,,,,,,,,,,,,1802,2020-06-09 10:33:08,N,0,N,0,N,0,N,0,,0.0,N,,0,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,21,16,12
1,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Bishop Street Class 3 Bicycle Boulevard with T...,Bishop Street from Flower Street to Standard A...,Bishop Street Bicycle Boulevard Project,Project is located within one of the ten large...,SCAG,Yes,2,33.74,117.86,This project will implement a Class 3 bicycle ...,,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Medium,Yes,50.0,0,Yes,50,No,,,0,No,4,1811,Attachment K - Not Applicable.pdf,Attachment A - Signature Page.pdf,Attachment E - Photos of Existing Conditions.pdf,Attachment D - Project .Plans.pdf,Attachment B - Checklist.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost .Estimate.pdf,,2020-08-20 18:49:12,"12-Santa Ana, City of-4",CYCLE 5,N,5063,No,No,,"Install 1.15 mile bike boulevard, construction...",Yes,,,,6336.0,,,,,,,,,,,,,,,2.0,,,,,,,,,,,,,,100,,,,,,,,,,,,,38.0,,15.0,16.0,,18.0,3.0,,,,1.0,6.0,,18.0,,,Yes,No,No,,,,,,,8800.0,,,,,1811,2020-08-20 18:49:12,N,0,N,0,N,0,N,0,,0.0,N,,0,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,69,46,34
2,City of Pacifica,04-5350-F15,Yes,"Pacifica, City of",,151 Milagra Drive,Associate Civil Engineer,94044,,,No,,22,,,14,,,San Mateo,4,CON funding for installing bicycling facilitie...,On Palmetto Ave between Paloma Ave and West Av...,Palmetto Ave - Esplanade Ave Bicycle & Pedestr...,Project is located outside one of the ten larg...,MTC,No,0,37.65,-122.49,The project will install a combination of Clas...,,,13,,,,No,,Yes,,Yes,,No,Infrastructure - Small,Yes,50.0,2,Yes,50,No,,,0,No,1,1804,,Attachment-A-Signature-page.pdf,Photos.pdf,Attachment D_Palmetto & Esplanade Ped-Bike Imp...,Attachment B_Engineers Checklist.pdf,,Letters of Support.pdf,,,Attachment F_ ATP Cycle 5_Palmetto-Esplanade B...,,2020-06-15 11:05:03,"4-Pacifica, City of-1",CYCLE 5,N,5350,0,0,,Bicycling and pedestrian amenities will be ins...,Yes,,,13752.0,5748.0,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,40,2.0,,,,20.0,,,,,,,,9.0,,,,,,,,,,,,,,,,Yes,No,No,,,,,,,,,,,,1804,2020-06-15 11:05:03,N,0,N,0,N,0,N,0,,0.0,N,,0,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,22,14,13
3,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Pedestrian traffic safety improvements for Jef...,"In the City of Santa Ana, the safe routes to s...",Jefferson ES_Thorpe Fundamental_McFadden Int_G...,Project is located within one of the ten large...,SCAG,Yes,2,33.71,117.89,"This project will be repairing, replacing and ...",,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Large,No,,5,Yes,100,Yes,,,0,No,13,1822,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D -Plans.pdf,Attachment B - Check list.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,,2020-09-08 10:15:52,"12-Santa Ana, City of-13",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 8 intersections, ...",Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,50,,,,,60.0,,,,Left Turn Arrow,3.0,Enhanced Crosswalk Unsignalized,3.0,218.0,1000.0,7.0,,,1.0,,,,,,,,7.0,,,Yes,No,No,,,,,,,,,,,,1822,2020-09-08 10:15:52,N,0,N,0,N,0,N,0,,0.0,N,,0,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,69,46,34
4,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Pedestrian traffic safety improvements for La...,"In the City of Santa Ana, the safe routes to s...",Lathrop Intermediate_Lowell ES_Martin ES_Pio P...,Project is located within one of the ten large...,SCAG,Yes,4,33.73,117.87,"This project will be repairing, replacing and ...",,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Large,No,,5,Yes,100,Yes,,,0,No,14,1823,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D - Plan.pdf,Attachment B - Checklist.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,,2020-08-31 12:34:31,"12-Santa Ana, City of-14",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 6 intersections, ...",Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,50,,,,,43.0,,,,Enhance crosswalk (unsignalized),7.0,Raised Crosswalk,2.0,189.0,3455.0,5.0,,,1.0,,,,,,,2.0,5.0,,,Yes,No,No,,,,,,,,,,,,1823,2020-08-31 12:34:31,N,0,N,0,N,0,N,0,,0.0,N,,0,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,69,46,34


## Null columns

In [62]:
# finding columns with all null and dropping for now. will keep out of script

In [63]:
alldatanull = alldata.columns[alldata.isna().all()].tolist()

In [64]:
alldatanull

['#',
 'atp_id',
 'ppno',
 'ppno_1',
 'a2_project_location_map',
 'a3_plan_active_trans',
 'a3_plan_bicycle',
 'a3_plan_ped',
 'a3_plan_srts',
 'a4_bike_gap_pct',
 'a4_easement_support',
 'a4_emp_based']

In [65]:
alldata = alldata.drop(columns=alldatanull)

In [66]:
dfnull = df.columns[df.isna().all()].tolist()
df = df.drop(columns=dfnull)

In [67]:
dfnull

['a2_project_location_map',
 'a3_plan_active_trans',
 'a3_plan_bicycle',
 'a3_plan_ped',
 'a3_plan_srts',
 'a3_trail_elig_cost',
 'attch_exhibit22_plan',
 'attch_link',
 'completed_pdf_form',
 'a4_bike_gap_pct',
 'a4_easement_support',
 'a4_emp_based',
 'a4_le_methods']

In [68]:
cleanednull = cleaned.columns[cleaned.isna().all()].tolist()
cleaned = cleaned.drop(columns=cleanednull)

## Changing Column Types

In [69]:
df.a2_mpo.value_counts()

SCAG        315
MTC         132
SANDAG       62
Caltrans     52
SACOG        52
SJCOG        38
TCAG         37
AMBAG        34
KCOG         31
COFCG        31
SBCAG        19
SLOCOG       15
BCAG         14
TMPO         13
SRTA         11
StanCOG      11
MCTC          7
KCAG          3
MCAG          3
CVAG          2
Name: a2_mpo, dtype: int64

In [70]:
df.details_datetime_stamp.info()

<class 'pandas.core.series.Series'>
Int64Index: 882 entries, 0 to 881
Series name: details_datetime_stamp
Non-Null Count  Dtype         
--------------  -----         
882 non-null    datetime64[ns]
dtypes: datetime64[ns](1)
memory usage: 13.8 KB


In [71]:
compare_col = np.where(
    df["main_datetime_stamp"] == df["details_datetime_stamp"], True, False
)
df["compare_datetime"] = compare_col
df.compare_datetime.value_counts()

True     763
False    119
Name: compare_datetime, dtype: int64

In [72]:
# some datetimes are seconds different others are larger
(
    df
    >> filter(_.compare_datetime == False)
    >> select(_.details_datetime_stamp, _.main_datetime_stamp)
)

Unnamed: 0,details_datetime_stamp,main_datetime_stamp
27,2020-09-10 16:23:42,2020-09-10 16:23:41
37,2020-09-11 12:48:11,2020-09-11 12:48:10
55,2020-09-10 14:14:03,2020-09-10 14:14:02
67,2020-09-14 19:20:56,2020-09-14 19:20:55
75,2020-09-11 16:29:11,2020-09-11 16:29:10
...,...,...
869,2022-06-16 12:10:18,2022-06-21 11:28:23
871,2022-06-16 12:12:24,2022-06-16 12:12:23
872,2022-06-16 12:15:24,2022-06-16 12:15:23
874,2022-06-16 10:57:37,2022-06-16 10:57:36


In [73]:
df = df.drop(columns="compare_datetime")

In [74]:
df.sample(2)

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans_exists,a3_plan_bicycle_exists,a3_plan_ped_exists,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_letters_of_support,attch_ni_workplan,attch_project_estimate,main_datetime_stamp,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,matches,assembly_district,congressional_district,senate_district
818,Los Angeles,07-5006F15,Yes,"Los Angeles, City of",00152S,1149 South Broadway 4th Floor,Assistant Director,90015,,,No,,54,59.0,62.0,37,43.0,,Los Angeles,7,11.76 miles of pedestrian and cyclist improvem...,South Los Angeles Community focused along West...,Western Our Way: Walk and Wheel Improvements,Project is located within one of the ten large...,SCAG,Yes,8,,,The project will transform almost 11.76 miles ...,,30,,,No,Yes,No,Yes,Infrastructure - Large,Yes,9.0,15,Yes,91,Yes,,0,No,2,3658,K Additional Attachments v9.pdf,A-Signature-Page (1)_encrypted_.pdf,E Photos of Existing Conditions 220608.pdf,D Project Maps Plans.pdf,B-Engr-Checklist_encrypted_.pdf,I Letters of Support.pdf,G Non-Infrastructure Work Plan.pdf,F Project-Estimate_25PE_15CE 2206134.xlsx,2022-06-22 13:14:32,"7-Los Angeles, City of-2",CYCLE 6,N,5006,No,Yes,"Mobility Plan 2035, Vision Zero Plan, Stress F...",A network of pedestrian and cyclist improvemen...,Yes,16.0,,,76666.0,,,,,,,70.0,,Diagonal Diverters,1.0,Medians limiting vehicular turns,12.0,,16.0,,,,,,,,,,,,,0,,,536.0,,223.0,,,,Curb Extensions,91.0,Pedestrian Hybrid Beacons,12.0,107.0,,,179.0,,,,,270.0,Drought-Tol Deciduous/Evergre,,,,,,,Yes,No,No,Bus Bulb Outs,,25.0,Speed Humps,58.0,,,,2.0,,1.0,3658,2022-06-15 16:00:52,N,0,N,0,N,0,N,0,,N,,0,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,both,"54, 59, 62","37, 43",30
776,Marysville,03-5916F15,Yes,Yuba County,00143S,"915 Eighth Street, Suite 125",Public Works Director,95901,,,No,,3,,,3,,,Yuba,3,"Project includes PAED, PS&E and construction f...",Unincorporated community of East Linda on Albr...,East Linda Comprehensive Safe Routes to School...,Project is located within one of the ten large...,SACOG,No,0,,,The unincorporated community of Linda develope...,,4,,,No,Yes,No,No,Infrastructure + NI - Large,Yes,55.0,1,Yes,45,Yes,,0,No,3,3649,YWA Resolution_Leveraging Funds.pdf,Application Signature Page.pdf,Photos of Existing Conditions - East Linda.pdf,Project Layout Plan East Linda.pdf,Engineer's Checklist.pdf,Letters of Support - East Linda.pdf,Non-Infrastructure Work Plan_East Linda.xlsx,Attachment-F-Project-Estimate_East Linda.xlsx,2022-06-15 15:42:18,3-Yuba County-3,CYCLE 6,N,5916,0,0,"Yuba's 2030 General Plan, SACOG Regional Bike ...","Design and construct 35,750’ of sidewalks, 12,...",Yes,,,12570.0,23180.0,,,,,,,,,Colored pavement at conflict points,6.0,,,,,,,,,,,,,,,,,100,,,,,89.0,,34650.0,,Unsignalized crosswalks,8.0,,,10.0,1100.0,,,,,,,,,,,,,,,Yes,No,No,,,,,,,,,,,,3649,2022-06-15 15:42:18,N,0,Y,10,Y,90,N,0,,N,,0,,,,,,1.0,3.0,,1.0,1.0,1.0,,,,1.0,,1.0,1.0,Community social media campaign on active tran...,,,Y,N,N,Y,N,Y,Flyers distributed to students in class,Spanish,Y,Y,Y,Y,Y,Y,"Yuba Water Agency, which is providing 20.5% of...",N,N,N,N,No,N,both,3,3,4


In [75]:
df.columns.tolist()

['a1_imp_agcy_city',
 'a1_imp_agcy_fed_ma_num',
 'a1_imp_agcy_ma',
 'a1_imp_agcy_name',
 'a1_imp_agcy_state_ma_num',
 'a1_imp_agcy_street',
 'a1_imp_agcy_title',
 'a1_imp_agcy_zip',
 'a1_letter_of_intent',
 'a1_proj_partner_agcy',
 'a1_proj_partner_exists',
 'a1_proj_partner_title',
 'a2_assem_dist_a',
 'a2_assem_dist_b',
 'a2_assem_dist_c',
 'a2_congress_dist_a',
 'a2_congress_dist_b',
 'a2_congress_dist_c',
 'a2_county',
 'a2_ct_dist',
 'a2_info_proj_descr',
 'a2_info_proj_loc',
 'a2_info_proj_name',
 'a2_mop_uza_population',
 'a2_mpo',
 'a2_past_proj',
 'a2_past_proj_qty',
 'a2_proj_lat',
 'a2_proj_long',
 'a2_proj_scope_summary',
 'a2_rtpa',
 'a2_senate_dist_a',
 'a2_senate_dist_b',
 'a2_senatedistc',
 'a3_plan_active_trans_exists',
 'a3_plan_bicycle_exists',
 'a3_plan_ped_exists',
 'a3_plan_srts_exists',
 'a3_proj_type',
 'a3_st_bicycle_applies',
 'a3_st_bicycle_pct',
 'a3_st_num_schools',
 'a3_st_ped_applies',
 'a3_st_ped_pct',
 'a3_st_srts',
 'a3_trail_fed_funding',
 'a3_trail_t

### Add Geometry

In [76]:
from dla_utils import _dla_utils
from shared_utils import geography_utils

In [77]:
gdf = geography_utils.create_point_geometry(
    df, longitude_col="a2_proj_long", latitude_col="a2_proj_lat"
)

In [78]:
gdf.sample(1)

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans_exists,a3_plan_bicycle_exists,a3_plan_ped_exists,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_letters_of_support,attch_ni_workplan,attch_project_estimate,main_datetime_stamp,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,matches,assembly_district,congressional_district,senate_district,geometry
820,Orange,12-6071R,Yes,Orange County Transportation Authority (OCTA),00267S,550 S. Main Street,Active Transportation Coordinator,92683,,,No,,68,70,67,45,46,40,Orange,12,Non-Infrastructure Next STEP (Safe Travels Edu...,10 public elementary schools in Orange County ...,Next STEP (Safe Travels Education Program),Project is located within one of the ten large...,SCAG,Yes,1,,,Existing Condition \r\rThroughout Orange Count...,,34,36,37,Yes,Yes,No,Yes,Non-Infrastructure,Yes,50.0,1,Yes,50,Yes,,0,No,1,3573,,Attachment A - Signature Page.pdf,Attachment E - Photos of Existing Conditions.pdf,,,Attachment I - Letters of Support - Next STEP ...,Attachment-G-Exhibit-25-R-NI-Work-Plan.xlsx,,2022-06-15 13:13:58,12-Orange County Transportation Authority (OCT...,CYCLE 6,N,6071,0,0,,SRTS activities at 10 schools (serving disadva...,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,No,No,No,,,,,,,,,,,,3573,2022-06-15 13:13:58,Y,25,N,0,Y,75,N,0,,N,,0,10.0,,,,,,10.0,10.0,10.0,5.0,5.0,5.0,5.0,,10.0,,10.0,30.0,SRTS Committee meetings,5.0,Grade-specific lesson plans for teachers,Y,N,Y,Y,Y,N,,TBD; will translate dependent on school demogr...,Y,Y,Y,Y,Y,N,,N,N,N,N,No,N,both,"68, 70, 67","45, 46, 40","34, 36, 37",POINT EMPTY


### Change columns to integers

In [79]:
def get_num(x):
    try:
        return int(x)
    except Exception:
        try:
            return float(x)
        except Exception:
            return x

In [80]:
columns_to_int = [
    "a1_locode",
    # "a2_senatedistc",
    # "a2_senate_dist_b",
    # "a2_assem_dist_b",
    # "a2_assem_dist_c",
    # "a2_congress_dist_b",
    # "a2_congress_dist_c",
    # "a2_proj_lat",
    # "a2_proj_long",
    # "a2_senate_dist_b",
    # "a2_senatedistc",
    "p_un_sig_inter_new_roundabout",
 #   "a4_emp_based_pct",
#    "a4_le_methods",
    "a4_srts_le",
    "a1_locode",
    "a2_senatedistc",
    "a2_senate_dist_b",
]

In [81]:
# gdf[columns_to_int] = gdf[columns_to_int].apply(get_num)

In [82]:
for col in columns_to_int:
    gdf[col] = gdf[col].apply(get_num)

In [83]:
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 882 entries, 0 to 881
Columns: 202 entries, a1_imp_agcy_city to geometry
dtypes: Int64(7), category(1), datetime64[ns](2), float64(83), geometry(1), int64(15), object(93)
memory usage: 1.4+ MB


In [84]:
gdf.select_dtypes("int64")

Unnamed: 0,a1_imp_agcy_zip,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_ct_dist,a2_past_proj_qty,a2_senate_dist_a,a3_st_num_schools,a3_st_ped_pct,a3_trail_trans_pct,agency_app_num,app_pk,a4_ped_gap_pct,app_fk,a4_reg_init_pct,a4_com_init_pct,a4_safe_route_pct,a4_fl_mile_pct,a4_other_ni_pct
0,95340,21,,,16,,,10,0,12,1,80,0,1,1802,0,1802,0,0,0,0,0
1,92702,69,,,46,,,12,2,34,0,50,0,4,1811,100,1811,0,0,0,0,0
2,94044,22,,,14,,,4,0,13,2,50,0,1,1804,40,1804,0,0,0,0,0
3,92702,69,,,46,,,12,2,34,5,100,0,13,1822,50,1822,0,0,0,0,0
4,92702,69,,,46,,,12,4,34,5,100,0,14,1823,50,1823,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
877,91362,,4,4,,2,6,7,0,,0,5,0,1,3192,5,3192,0,0,0,0,0
878,91733,49,,,32,,,7,5,22,4,25,0,1,3859,0,3859,0,0,0,0,0
879,95113,25,,,19,,,4,1,15,0,40,0,3,3860,2,3860,0,0,0,0,0
880,93101,,3,7,,2,4,5,2,,1,75,0,2,3845,20,3845,50,0,50,0,0


In [85]:
gdf.select_dtypes("object")

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_county,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_proj_scope_summary,a2_rtpa,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans_exists,a3_plan_bicycle_exists,a3_plan_ped_exists,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_ped_applies,a3_st_srts,a3_trail_fed_funding,a3_trails,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_letters_of_support,attch_ni_workplan,attch_project_estimate,project_app_id,project_cycle,awarded_x,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_other_bike_improv_1,b_other_bike_improv_2,m_other_trail_imprv_1,m_other_trail_imprv_2,p_other_ped_imprv_1,p_other_ped_imprv_2,p_amenities_shade_tree_type,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_other_traffic_calming_imprv_2,a4_reg_init,a4_com_init,a4_safe_route,a4_fl_mile,a4_other_ni,a4_other_ni_descr,a4_act_other_1_descr,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,awarded_y,assembly_district,congressional_district,senate_district
0,Merced,10-5939R,Yes,Merced County,00033S,345 west 7th street,Deputy Director,,,No,,Merced,"PA&ED, PS&E, and CON funding for construction ...",1) South side of Haskell Ave from Cody ave to ...,Planada Sidewalk Infill Project,Project is located outside one of the ten larg...,MCAG,No,The Planada Sidewalk Infill Project is located...,,,,No,Yes,Yes,No,Infrastructure - Small,Yes,Yes,Yes,,No,Planada Sidewalk infill ATP cross section 1.pdf,Attachment A- Signature Page.pdf,Existing Photos Attachment.pdf,Planada ATP Plan Concept.pdf,Attachment-B-Engr-Checklist (MH).pdf,Letters of Support.pdf,,Project Estimate.pdf,10-Merced County-1,CYCLE 5,N,5939,No,No,,Sidewalk infill along portions of Haskell aven...,No,,,,,,,,No,No,Yes,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,21,16,12
1,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,,,No,,Orange,Bishop Street Class 3 Bicycle Boulevard with T...,Bishop Street from Flower Street to Standard A...,Bishop Street Bicycle Boulevard Project,Project is located within one of the ten large...,SCAG,Yes,This project will implement a Class 3 bicycle ...,,,,Yes,Yes,No,Yes,Infrastructure - Medium,Yes,Yes,No,,No,Attachment K - Not Applicable.pdf,Attachment A - Signature Page.pdf,Attachment E - Photos of Existing Conditions.pdf,Attachment D - Project .Plans.pdf,Attachment B - Checklist.pdf,Attachment I - Letter of Support.pdf,Attachment G - Not Applicable.pdf,Attachment F - Cost .Estimate.pdf,"12-Santa Ana, City of-4",CYCLE 5,N,5063,No,No,,"Install 1.15 mile bike boulevard, construction...",Yes,,,,,,,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,69,46,34
2,City of Pacifica,04-5350-F15,Yes,"Pacifica, City of",,151 Milagra Drive,Associate Civil Engineer,,,No,,San Mateo,CON funding for installing bicycling facilitie...,On Palmetto Ave between Paloma Ave and West Av...,Palmetto Ave - Esplanade Ave Bicycle & Pedestr...,Project is located outside one of the ten larg...,MTC,No,The project will install a combination of Clas...,,,,No,Yes,Yes,No,Infrastructure - Small,Yes,Yes,No,,No,,Attachment-A-Signature-page.pdf,Photos.pdf,Attachment D_Palmetto & Esplanade Ped-Bike Imp...,Attachment B_Engineers Checklist.pdf,Letters of Support.pdf,,Attachment F_ ATP Cycle 5_Palmetto-Esplanade B...,"4-Pacifica, City of-1",CYCLE 5,N,5350,0,0,,Bicycling and pedestrian amenities will be ins...,Yes,,,,,,,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,22,14,13
3,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,,,No,,Orange,Pedestrian traffic safety improvements for Jef...,"In the City of Santa Ana, the safe routes to s...",Jefferson ES_Thorpe Fundamental_McFadden Int_G...,Project is located within one of the ten large...,SCAG,Yes,"This project will be repairing, replacing and ...",,,,Yes,Yes,No,Yes,Infrastructure - Large,No,Yes,Yes,,No,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D -Plans.pdf,Attachment B - Check list.pdf,Attachment I - Letter of Support.pdf,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,"12-Santa Ana, City of-13",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 8 intersections, ...",Yes,,,,,Left Turn Arrow,Enhanced Crosswalk Unsignalized,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,69,46,34
4,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,,,No,,Orange,Pedestrian traffic safety improvements for La...,"In the City of Santa Ana, the safe routes to s...",Lathrop Intermediate_Lowell ES_Martin ES_Pio P...,Project is located within one of the ten large...,SCAG,Yes,"This project will be repairing, replacing and ...",,,,Yes,Yes,No,Yes,Infrastructure - Large,No,Yes,Yes,,No,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D - Plan.pdf,Attachment B - Checklist.pdf,Attachment I - Letter of Support.pdf,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,"12-Santa Ana, City of-14",CYCLE 5,N,5063,No,No,,"Construct curb extensions at 6 intersections, ...",Yes,,,,,Enhance crosswalk (unsignalized),Raised Crosswalk,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,69,46,34
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
877,Thousand Oaks,07-5292F15,Yes,"Thousand Oaks, City of",00252,2100 Thousand Oaks Blvd.,Transportation Planner,,,No,,Ventura,"Construction funding for Class IV bikelanes, ...","In the City of Thousand Oaks, Lynn Road betwee...",Lynn Road Bike Lanes and Pedestrain Improvements,Project is located within one of the ten large...,SCAG,No,The project is located on 4.5-miles of Lynn R...,,2,7,Yes,No,No,No,Infrastructure - Small,Yes,Yes,No,,No,ATTACHMENT K.pdf,Attachement A Signed.pdf,photoskn.pdf,Lynn Concept Plans.pdf,Attachment-B-Engr-Checklist_Lynn_SB.pdf,Letters of Support.pdf,,Attachment-F-Project-Estimate_Lynn_SB.xlsx,"7-Thousand Oaks, City of-1",CYCLE 6,N,5392,No,Yes,Local Road Safety Plan,"265' new sidewalk, 2 rapid flashing beacons, 1...",Yes,,,,,,,,No,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,44,26,27
878,South El Monte,,Yes,"South El Monte, City of",07-5352S21,1415 Santa Anita Avenue,Community Development Director,Partner Agency Letter of Intent.pdf,City of El Monte,Yes,City Engineer,Los Angeles,Construct Class II bike lane segments; install...,Merced Avenue from Garvey Avenue to Fern Stree...,Merced Avenue Greenway,Project is located within one of the ten large...,SCAG,Yes,The project will implement bicyclist/pedestria...,,,,No,Yes,No,No,Infrastructure - Small,Yes,Yes,No,,No,Att K - Support Docs.pdf,Att A - Signature Page.pdf,Att E - Photos of Existing Conditions.pdf,Att D - Project Plans.pdf,Att B - Eng Checklist.pdf,Att I - Letters of Support.pdf,Att G - Not Applicable.pdf,Att F - Project Estimate.xlsx,"7-South El Monte, City of-1",CYCLE 6,N,5352,0,0,,Construct 0.97-mile Class II bike path; 4 enha...,Yes,,,,,,,,Yes,No,No,,,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,49,32,22
879,San Jose,04-5005F15,Yes,"San Jose, City of",00200S,200 E Santa Clara St,Senior Transportation Specialist,,,No,,Santa Clara,This project will decouple 2nd and 3rd street ...,The project is in SoFA arts district in southw...,2nd & 3rd Street De-Coupling and Complete Stre...,Project is located within one of the ten large...,MTC,Yes,"The City of San José, through its Downtown Tra...",,,,No,Yes,No,No,Infrastructure - Large,Yes,Yes,No,,No,attachment k.pdf,Attachment-A-Signature-Page (1)_jr (1).pdf,Attachment_G_Site_Photos.pdf,2_3DESIGNS.pdf,Attachment-B-Engr-Checklist- 2nd and 3rd.pdf,LOS.pdf,,2nd and 3rd ATP Engineers Estimate_Final.pdf,"4-San Jose, City of-3",CYCLE 6,N,5005,0,0,"Emerging mobility Action Plan, Carbon Neutral ...",Project constructs approximately 6840 feet of ...,Yes,Bike Ramps,Raised Intersections,,,Fully Bulbed (all 4 corners),,,Yes,No,No,Conversion of 1 to 2 way operation,<---- 0.68 miles,N,N,N,N,N,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,N,25,19,15
880,Santa Barbara,05-5951R,Yes,Santa Barbara County,00100S,123 E. Anapuma St,Alternative Transportation Manager,,,No,,Santa Barbara,"Curb extensions, sidewalks and crosswalks for ...",Unincorporated neighborhood located south of E...,Isla Vista Bike and Pedestrian Improvements Pr...,Project is located outside one of the large MP...,SBCAG,Yes,"Isla Vista is a place like no other. 15,733 pe...",,1,9,Yes,No,No,No,Infrastructure + NI - Medium,Yes,Yes,Yes,,No,,Attachment A_Signature Page - 2022.pdf,Existing Conditions Photos.pdf,Isla Vista Community Improvements - ATP Cycle ...,Attachment B-Engr Checklist IV.pdf,Attachment I - Letters of Support 2022.pdf,Attachment-G-Exhibit-25-R-NI-Work-Plan - Isla ...,Attachment-F-Project-Estimate-IV Updated.pdf,5-Santa Barbara County-2,CYCLE 6,N,5951,0,0,Regional Transportation Plan,"Curb extensions, sidewalks, and bicycle networ...",Yes,Bike left-hand turn lanes,Class 2 conflict / intersection striping,,,,,,No,No,Yes,,,Y,N,Y,N,N,,,,N,N,Y,Y,N,N,,"Spanish, Mandarin",Y,N,Y,N,N,N,,N,N,N,N,No,N,37,24,19


In [86]:
gdf.select_dtypes("float64")

Unnamed: 0,a2_proj_lat,a2_proj_long,a3_st_bicycle_pct,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_qty_1,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_improv_qty_1,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_qty_1,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,a4_emp_based_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_2
0,37.29,120.31,20.00,,,,1500.00,,,,,,,,,,,,,,,,,,,,,,,,,,,6.00,,1500.00,,,,5.00,,4.00,3.00,,,,,,,,,,,,,,,,,,,,,0.00,,,,,,,,,,,,,,,,,,,
1,33.74,117.86,50.00,,,,6336.00,,,,,,,,,,,,2.00,,,,,,,,,,,,,,,,,,,,,38.00,,15.00,16.00,,18.00,3.00,,,1.00,6.00,,18.00,,,,,,,8800.00,,,,,0.00,,,,,,,,,,,,,,,,,,,
2,37.65,-122.49,50.00,,,13752.00,5748.00,,,,,,,,,,,,,1.00,,,,,,,,,,2.00,,,,20.00,,,,,,9.00,,,,,,,,,,,,,,,,,,,,,,,,0.00,,,,,,,,,,,,,,,,,,,
3,33.71,117.89,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,60.00,,,,3.00,3.00,218.00,1000.00,7.00,,,1.00,,,,,,,7.00,,,,,,,,,,,,0.00,,,,,,,,,,,,,,,,,,,
4,33.73,117.87,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,43.00,,,,7.00,2.00,189.00,3455.00,5.00,,,1.00,,,,,,2.00,5.00,,,,,,,,,,,,0.00,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
877,,,95.00,,,,,24820.00,,,,,,,,,,324.00,2.00,2.00,,,,,,,,,,,,,,,,265.00,,,,4.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
878,,,75.00,,,5100.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.00,23000.00,4.00,,16.00,,,,,,,,4.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
879,,,60.00,9.00,,,,6840.00,,,,,,,,8.00,6.00,,,,4.00,,,,,,,,,,,,,,,,,60.00,,48.00,6840.00,8.00,,,2.00,,,,,,,4.00,4.00,6840.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
880,,,25.00,2.00,,1700.00,22410.00,,,,,,,,,10.00,24.00,,,,,,,,,,,,,1.00,,,1.00,110.00,,2820.00,,,,23.00,,,,,,,,,,,,42.00,17.00,2700.00,,,,,,,,,,,2.00,4.00,4.00,,,,,,4.00,2.00,,,,,,,,,


#### Columns to add/change:
* ~geometry column for lat long~
* ~agg senate_dist~
* ~agg congressional_dist~
* ~agg assemb_dist~
* ~a1_locode to int64~
* ~change 0s values in column to null~


#### Columns to maybe add
* a2_county acronym
*

In [87]:
df >> select(_.a2_county)

Unnamed: 0,a2_county
0,Merced
1,Orange
2,San Mateo
3,Orange
4,Orange
...,...
877,Ventura
878,Los Angeles
879,Santa Clara
880,Santa Barbara


## Locode Check

In [88]:
gdf.a1_locode.info()

<class 'pandas.core.series.Series'>
Int64Index: 882 entries, 0 to 881
Series name: a1_locode
Non-Null Count  Dtype 
--------------  ----- 
881 non-null    object
dtypes: object(1)
memory usage: 13.8+ KB


In [89]:
(gdf>>select(_.a1_locode)>>filter(_.a1_locode.isna()))

Unnamed: 0,a1_locode
433,


In [109]:
(gdf.loc[:, gdf.isna().any()])

Unnamed: 0,a1_imp_agcy_fed_ma_num,a1_imp_agcy_state_ma_num,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_info_proj_descr,a2_proj_lat,a2_proj_long,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_st_bicycle_pct,a3_trail_fed_funding,attch_addtl_attachments,attch_conditions_project_map,attch_engineeers_checklist,attch_letters_of_support,attch_ni_workplan,attch_project_estimate,a1_locode,a3_plan_other_desc,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,a4_emp_based_pct,a4_other_ni_descr,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_other_descr,a4_comm_language,a4_colab_other_descr
0,10-5939R,00033S,,,,21,,,16,,,"PA&ED, PS&E, and CON funding for construction ...",37.29,120.31,12,,,20.00,,Planada Sidewalk infill ATP cross section 1.pdf,Planada ATP Plan Concept.pdf,Attachment-B-Engr-Checklist (MH).pdf,Letters of Support.pdf,,Project Estimate.pdf,5939,,No,,,,1500.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,6.00,,1500.00,,,,,,5.00,,4.00,3.00,,,,,,,,,,,,,,,,,,,,,,,,0.00,,,,,,,,,,,,,,,,,,,,,,,,,
1,12-5063,00289S,,,,69,,,46,,,Bishop Street Class 3 Bicycle Boulevard with T...,33.74,117.86,34,,,50.00,,Attachment K - Not Applicable.pdf,Attachment D - Project .Plans.pdf,Attachment B - Checklist.pdf,Attachment I - Letter of Support.pdf,Attachment G - Not Applicable.pdf,Attachment F - Cost .Estimate.pdf,5063,,Yes,,,,6336.00,,,,,,,,,,,,,,2.00,,,,,,,,,,,,,,,,,,,,,,,,,38.00,,15.00,16.00,,18.00,3.00,,,,1.00,6.00,,18.00,,,,,,,,,8800.00,,,,,0.00,,,,,,,,,,,,,,,,,,,,,,,,,
2,04-5350-F15,,,,,22,,,14,,,CON funding for installing bicycling facilitie...,37.65,-122.49,13,,,50.00,,,Attachment D_Palmetto & Esplanade Ped-Bike Imp...,Attachment B_Engineers Checklist.pdf,Letters of Support.pdf,,Attachment F_ ATP Cycle 5_Palmetto-Esplanade B...,5350,,Yes,,,13752.00,5748.00,,,,,,,,,,,,,,,1.00,,,,,,,,,,,,2.00,,,,20.00,,,,,,,,9.00,,,,,,,,,,,,,,,,,,,,,,,,,,,0.00,,,,,,,,,,,,,,,,,,,,,,,,,
3,12-5063,00289S,,,,69,,,46,,,Pedestrian traffic safety improvements for Jef...,33.71,117.89,34,,,,,Attachment K.pdf,Attachment D -Plans.pdf,Attachment B - Check list.pdf,Attachment I - Letter of Support.pdf,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,5063,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,60.00,,,,Left Turn Arrow,3.00,Enhanced Crosswalk Unsignalized,3.00,218.00,1000.00,7.00,,,1.00,,,,,,,,7.00,,,,,,,,,,,,,,0.00,,,,,,,,,,,,,,,,,,,,,,,,,
4,12-5063,00289S,,,,69,,,46,,,Pedestrian traffic safety improvements for La...,33.73,117.87,34,,,,,Attachment K.pdf,Attachment D - Plan.pdf,Attachment B - Checklist.pdf,Attachment I - Letter of Support.pdf,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,5063,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,43.00,,,,Enhance crosswalk (unsignalized),7.00,Raised Crosswalk,2.00,189.00,3455.00,5.00,,,1.00,,,,,,,2.00,5.00,,,,,,,,,,,,,,0.00,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
877,07-5292F15,00252,,,,,4,4,,2,6,"Construction funding for Class IV bikelanes, ...",,,,2,7,95.00,,ATTACHMENT K.pdf,Lynn Concept Plans.pdf,Attachment-B-Engr-Checklist_Lynn_SB.pdf,Letters of Support.pdf,,Attachment-F-Project-Estimate_Lynn_SB.xlsx,5392,Local Road Safety Plan,Yes,,,,,24820.00,,,,,,,,,,,,324.00,2.00,2.00,,,,,,,,,,,,,,,,,,265.00,,,,,,4.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
878,,07-5352S21,Partner Agency Letter of Intent.pdf,City of El Monte,City Engineer,49,,,32,,,Construct Class II bike lane segments; install...,,,22,,,75.00,,Att K - Support Docs.pdf,Att D - Project Plans.pdf,Att B - Eng Checklist.pdf,Att I - Letters of Support.pdf,Att G - Not Applicable.pdf,Att F - Project Estimate.xlsx,5352,,Yes,,,5100.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.00,23000.00,4.00,,16.00,,,,,,,,,4.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
879,04-5005F15,00200S,,,,25,,,19,,,This project will decouple 2nd and 3rd street ...,,,15,,,60.00,,attachment k.pdf,2_3DESIGNS.pdf,Attachment-B-Engr-Checklist- 2nd and 3rd.pdf,LOS.pdf,,2nd and 3rd ATP Engineers Estimate_Final.pdf,5005,"Emerging mobility Action Plan, Carbon Neutral ...",Yes,9.00,,,,6840.00,,,,,,,,Bike Ramps,8.00,Raised Intersections,6.00,,,,4.00,,,,,,,,,,,,,,,,,,,Fully Bulbed (all 4 corners),60.00,,,48.00,6840.00,8.00,,,2.00,,,,,,,,4.00,4.00,6840.00,Conversion of 1 to 2 way operation,,,<---- 0.68 miles,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
880,05-5951R,00100S,,,,,3,7,,2,4,"Curb extensions, sidewalks and crosswalks for ...",,,,1,9,25.00,,,Isla Vista Community Improvements - ATP Cycle ...,Attachment B-Engr Checklist IV.pdf,Attachment I - Letters of Support 2022.pdf,Attachment-G-Exhibit-25-R-NI-Work-Plan - Isla ...,Attachment-F-Project-Estimate-IV Updated.pdf,5951,Regional Transportation Plan,Yes,2.00,,1700.00,22410.00,,,,,,,,,Bike left-hand turn lanes,10.00,Class 2 conflict / intersection striping,24.00,,,,,,,,,,,,,,,1.00,,,1.00,110.00,,2820.00,,,,,,23.00,,,,,,,,,,,,,42.00,17.00,2700.00,,,,,,,,,,,,,,2.00,4.00,4.00,,,,,,4.00,2.00,,,,,,,,,,,,,"Spanish, Mandarin",


In [110]:
(cleaned[pd.to_numeric(cleaned['a1_locode'], errors='coerce').isnull()])>>select(_.a1_imp_agcy_city, _.a2_county, _.a1_imp_agcy_name, _.a1_locode)

Unnamed: 0,a1_imp_agcy_city,a2_county,a1_imp_agcy_name,a1_locode
28,Covelo,MEN,Round Valley Indians Tribe,
91,Orleans,SIS,Karuk Tribe,
160,Biggs,BUT,"Biggs, City of",
204,Hawaiian Gardens,LA,"Hawaiian Gardens, City of",
269,San Francisco,ALA,Bay Area Toll Authority,
324,Redding,TEH,Department of Transportation,
395,Los Angeles,LA,California Department of Transportation,
421,Fremont,ALA,"Fremont, City of",
439,Salinas,MON,Monterey County,


In [95]:
(gdf[pd.to_numeric(gdf['a1_locode'], errors='coerce').isnull()])>>select(_.a1_imp_agcy_city, _.a2_county, _.a1_imp_agcy_name, _.a1_locode)

Unnamed: 0,a1_imp_agcy_city,a2_county,a1_imp_agcy_name,a1_locode
28,Covelo,Mendocino,Round Valley Indians Tribe,
91,Orleans,Siskiyou,Karuk Tribe,
159,Biggs,Butte,"Biggs, City of",
202,Hawaiian Gardens,Los Angeles,"Hawaiian Gardens, City of",
265,San Francisco,Alameda,Bay Area Toll Authority,
319,Redding,Tehama,Department of Transportation,
387,Los Angeles,Los Angeles,California Department of Transportation,
413,Fremont,Alameda,"Fremont, City of",
433,Salinas,Monterey,Monterey County,
636,Nice,Lake,Robinson Rancheria,


In [97]:
gdf>>filter(_.a1_locode.isnull())>>select(_.a1_imp_agcy_city, _.a2_county, _.a1_imp_agcy_name, _.a1_locode)

Unnamed: 0,a1_imp_agcy_city,a2_county,a1_imp_agcy_name,a1_locode
433,Salinas,Monterey,Monterey County,


In [98]:
gdf>>filter(_.a1_locode==('None'))>>select(_.a1_imp_agcy_city, _.a2_county, _.a1_imp_agcy_name, _.a1_locode)

Unnamed: 0,a1_imp_agcy_city,a2_county,a1_imp_agcy_name,a1_locode
28,Covelo,Mendocino,Round Valley Indians Tribe,
91,Orleans,Siskiyou,Karuk Tribe,
159,Biggs,Butte,"Biggs, City of",
202,Hawaiian Gardens,Los Angeles,"Hawaiian Gardens, City of",
265,San Francisco,Alameda,Bay Area Toll Authority,
319,Redding,Tehama,Department of Transportation,
387,Los Angeles,Los Angeles,California Department of Transportation,
413,Fremont,Alameda,"Fremont, City of",
636,Nice,Lake,Robinson Rancheria,
742,Anza,Riverside,Cahuilla Band of Indians,


In [101]:
gdf = gdf.replace({'a1_locode': 'None'}, np.nan)

In [105]:
gdf = gdf.astype({'a1_locode':'Int64'})

In [107]:
gdf>>filter(_.a1_locode.isnull())>>select(_.a1_imp_agcy_city, _.a2_county, _.a1_imp_agcy_name, _.a1_locode)

Unnamed: 0,a1_imp_agcy_city,a2_county,a1_imp_agcy_name,a1_locode
28,Covelo,Mendocino,Round Valley Indians Tribe,
91,Orleans,Siskiyou,Karuk Tribe,
159,Biggs,Butte,"Biggs, City of",
202,Hawaiian Gardens,Los Angeles,"Hawaiian Gardens, City of",
265,San Francisco,Alameda,Bay Area Toll Authority,
319,Redding,Tehama,Department of Transportation,
387,Los Angeles,Los Angeles,California Department of Transportation,
413,Fremont,Alameda,"Fremont, City of",
433,Salinas,Monterey,Monterey County,
636,Nice,Lake,Robinson Rancheria,


In [92]:
# for the agencies with no locode, we could coerce the errors to return a "None" value

### Connecting DLA Locodes to application data

* to check- if new locodes are added after ATP project selection

In [111]:
locodes = to_snakecase(
    pd.read_excel("gs://calitp-analytics-data/data-analyses/dla/e-76Obligated/locodes_updated7122021.xlsx")
)

In [113]:
locodes.head()

Unnamed: 0,agency_locode,agency_name,district,county_name,rtpa_name,mpo_name,mpo_locode_fads,active_e76s______7_12_2021_
0,6302,Humboldt Bay Harbor Recreation & Conservation ...,1,Humboldt County,Humboldt County Association of Governments,NON-MPO,NON-MPO,
1,6330,Willow Creek Community Services District,1,Humboldt County,Humboldt County Association of Governments,NON-MPO,NON-MPO,
2,5036,Trinidad,1,Humboldt County,Humboldt County Association of Governments,NON-MPO,NON-MPO,
3,5049,Ukiah,1,Mendocino County,Mendocino Council of Governments,NON-MPO,NON-MPO,Yes
4,5082,Willits,1,Mendocino County,Mendocino Council of Governments,NON-MPO,NON-MPO,


In [124]:
def get_num(x):
    try:
        return int(x)
    except Exception:
        try:
            return float(x)
        except Exception:
            return x  


In [130]:
check_locode = gdf>>select(_.a1_imp_agcy_city, _.a2_county, _.a1_imp_agcy_name, _.a1_locode)

In [131]:
check_locode['a1_locode'] = check_locode['a1_locode'].apply(get_num)

In [132]:
(check_locode.merge(locodes, left_on='a1_imp_agcy_name', right_on='agency_name', how = 'outer', indicator = True))._merge.value_counts()

right_only    975
left_only     654
both          228
Name: _merge, dtype: int64

In [147]:
(check_locode.merge(locodes, left_on='a1_locode', right_on='agency_locode', how = 'outer', indicator = True))._merge.value_counts()

both          860
right_only    720
left_only      22
Name: _merge, dtype: int64

In [164]:
(check_locode.merge(locodes, left_on='a1_locode', right_on='agency_locode', how = 'outer', indicator = True))>>filter(_._merge == 'both')

Unnamed: 0,a1_imp_agcy_city,a2_county,a1_imp_agcy_name,a1_locode,agency_locode,agency_name,district,county_name,rtpa_name,mpo_name,mpo_locode_fads,active_e76s______7_12_2021_,_merge
0,Merced,Merced,Merced County,5939,5939.00,Merced County,10.00,Merced County,Merced County Association of Governments,Merced County Association Of Goverments,MCAG,Yes,both
1,Merced,Merced,Merced County,5939,5939.00,Merced County,10.00,Merced County,Merced County Association of Governments,Merced County Association Of Goverments,MCAG,Yes,both
2,Santa Ana,Orange,"Santa Ana, City of",5063,5063.00,Santa Ana,12.00,Orange County,Orange County Transportation Authority,Southern California Association Of Governments,SCAG,Yes,both
3,Santa Ana,Orange,"Santa Ana, City of",5063,5063.00,Santa Ana,12.00,Orange County,Orange County Transportation Authority,Southern California Association Of Governments,SCAG,Yes,both
4,Santa Ana,Orange,"Santa Ana, City of",5063,5063.00,Santa Ana,12.00,Orange County,Orange County Transportation Authority,Southern California Association Of Governments,SCAG,Yes,both
...,...,...,...,...,...,...,...,...,...,...,...,...,...
877,San Fernando,Los Angeles,"San Fernando, City of",5202,5202.00,San Fernando,7.00,Los Angeles County,Los Angeles County Metropolitan Transportation...,Southern California Association Of Governments,SCAG,Yes,both
878,SANTA MARIA,Santa Barbara,"Santa Maria, City of",5138,5138.00,Santa Maria,5.00,Santa Barbara County,Santa Barbara County Association of Governments,Santa Barbara County Association Of Governments,SBCAG,Yes,both
879,Azusa,Los Angeles,"Azusa, City of",5112,5112.00,Azusa,7.00,Los Angeles County,Los Angeles County Metropolitan Transportation...,Southern California Association Of Governments,SCAG,Yes,both
880,Temecula,Riverside,"Temecula, City of",5459,5459.00,Temecula,8.00,Riverside County,Riverside County Transportation Commission,Southern California Association Of Governments,SCAG,Yes,both


In [159]:
remaining_locodes = ((check_locode.merge(locodes, left_on='a1_locode', right_on='agency_locode', how = 'outer', indicator = True))
    >>filter(_._merge== 'left_only'))

In [161]:
remaining_locodes = remaining_locodes.drop(columns=[
                                                   'agency_name',
                                                   'district',
                                                   'county_name',
                                                   'rtpa_name',
                                                   'mpo_name',
                                                   'mpo_locode_fads',
                                                   'active_e76s______7_12_2021_'])

In [162]:
remaining_locodes

Unnamed: 0,a1_imp_agcy_city,a2_county,a1_imp_agcy_name,a1_locode,agency_locode,_merge
99,Covelo,Mendocino,Round Valley Indians Tribe,,,left_only
100,Orleans,Siskiyou,Karuk Tribe,,,left_only
101,Biggs,Butte,"Biggs, City of",,,left_only
102,Hawaiian Gardens,Los Angeles,"Hawaiian Gardens, City of",,,left_only
103,San Francisco,Alameda,Bay Area Toll Authority,,,left_only
104,Redding,Tehama,Department of Transportation,,,left_only
105,Los Angeles,Los Angeles,California Department of Transportation,,,left_only
106,Fremont,Alameda,"Fremont, City of",,,left_only
107,Salinas,Monterey,Monterey County,,,left_only
108,Nice,Lake,Robinson Rancheria,,,left_only


In [165]:
compare_names = np.where(remaining_locodes["a1_imp_agcy_name"] == locodes["agency_name"], True, False)
remaining_locodes["compare_names"] = compare_names

ValueError: Can only compare identically-labeled Series objects

### Match using Fuzzy Matcher

In [167]:
import fuzzymatcher

In [191]:
matching = fuzzymatcher.fuzzy_left_join(remaining_locodes, locodes, 'a1_imp_agcy_name', 'agency_name')

In [192]:
matching>>arrange(-_.best_match_score)

Unnamed: 0,best_match_score,__id_left,__id_right,a1_imp_agcy_city,a2_county,a1_imp_agcy_name,a1_locode,agency_locode_left,_merge,agency_locode_right,agency_name,district,county_name,rtpa_name,mpo_name,mpo_locode_fads,active_e76s______7_12_2021_
273,0.41,12_left,390_right,Pomona,Los Angeles,"California State Polytechnic University, Pomon...",,,left_only,6426,California Polytechnic State University,5,San Luis Obispo County,NON-RTPA,San Luis Obispo Council Of Governments,SLOCOG,
53,0.32,3_left,647_right,Hawaiian Gardens,Los Angeles,"Hawaiian Gardens, City of",,,left_only,5387,Hawaiian Gardens,7,Los Angeles County,Los Angeles County Metropolitan Transportation...,Southern California Association Of Governments,SCAG,
265,0.25,8_left,445_right,Salinas,Monterey,Monterey County,,,left_only,5944,Monterey County,5,Monterey County,Transportation Agency For Monterey County,Association of Monterey Bay Area Governments,AMBAG,Yes
271,0.16,10_left,875_right,Anza,Riverside,Cahuilla Band of Indians,,,left_only,7501,La Jolla Band of Luiseno Indians,11,San Diego County,San Diego Association of Governments,NON-MPO,NON-MPO,
104,0.15,4_left,323_right,San Francisco,Alameda,Bay Area Toll Authority,,,left_only,6365,San Francisco Bay Area Water Transit Authority,4,San Francisco County,Metropolitan Transportation Commission,Metropolitan Transportation Commission,MTC,
272,0.15,11_left,323_right,San Francisco,Alameda,Bay Area Toll Authority,,,left_only,6365,San Francisco Bay Area Water Transit Authority,4,San Francisco County,Metropolitan Transportation Commission,Metropolitan Transportation Commission,MTC,
162,0.15,6_left,1034_right,Los Angeles,Los Angeles,California Department of Transportation,,,left_only,6281,California Department of Highway Patrol,53,,NON-RTPA,NON-MPO,NON-MPO,
297,0.15,14_left,1034_right,Stockton,Tuolumne,California Department of Transportation,0.0,,left_only,6281,California Department of Highway Patrol,53,,NON-RTPA,NON-MPO,NON-MPO,
349,0.15,15_left,1034_right,Bishop,Inyo,California Department of Transportation,0.0,,left_only,6281,California Department of Highway Patrol,53,,NON-RTPA,NON-MPO,NON-MPO,
401,0.15,16_left,1034_right,Fresno,Tulare,California Department of Transportation,0.0,,left_only,6281,California Department of Highway Patrol,53,,NON-RTPA,NON-MPO,NON-MPO,


### Match using isin

In [193]:
#matching['isin'] = matching.names.isin(matching.names)

In [194]:
matching['isin'] = matching['agency_name'].isin(matching['a1_imp_agcy_name'])

In [195]:
matching

Unnamed: 0,best_match_score,__id_left,__id_right,a1_imp_agcy_city,a2_county,a1_imp_agcy_name,a1_locode,agency_locode_left,_merge,agency_locode_right,agency_name,district,county_name,rtpa_name,mpo_name,mpo_locode_fads,active_e76s______7_12_2021_,isin
0,0.07,0_left,37_right,Covelo,Mendocino,Round Valley Indians Tribe,,,left_only,7502,Hoopa Valley Tribe,1,Humboldt County,Humboldt County Association of Governments,NON-MPO,NON-MPO,,False
1,-0.01,1_left,37_right,Orleans,Siskiyou,Karuk Tribe,,,left_only,7502,Hoopa Valley Tribe,1,Humboldt County,Humboldt County Association of Governments,NON-MPO,NON-MPO,,False
2,0.11,2_left,87_right,Biggs,Butte,"Biggs, City of",,,left_only,5128,Biggs,3,Butte County,Butte County Association of Governments,Butte County Association Of Governments,BCAG,Yes,False
53,0.32,3_left,647_right,Hawaiian Gardens,Los Angeles,"Hawaiian Gardens, City of",,,left_only,5387,Hawaiian Gardens,7,Los Angeles County,Los Angeles County Metropolitan Transportation...,Southern California Association Of Governments,SCAG,,False
104,0.15,4_left,323_right,San Francisco,Alameda,Bay Area Toll Authority,,,left_only,6365,San Francisco Bay Area Water Transit Authority,4,San Francisco County,Metropolitan Transportation Commission,Metropolitan Transportation Commission,MTC,,False
105,0.05,5_left,1033_right,Redding,Tehama,Department of Transportation,,,left_only,6280,Department Of General Services,53,,NON-RTPA,NON-MPO,NON-MPO,,False
162,0.15,6_left,1034_right,Los Angeles,Los Angeles,California Department of Transportation,,,left_only,6281,California Department of Highway Patrol,53,,NON-RTPA,NON-MPO,NON-MPO,,False
214,0.11,7_left,306_right,Fremont,Alameda,"Fremont, City of",,,left_only,5322,Fremont,4,Alameda County,Metropolitan Transportation Commission,Metropolitan Transportation Commission,MTC,Yes,False
265,0.25,8_left,445_right,Salinas,Monterey,Monterey County,,,left_only,5944,Monterey County,5,Monterey County,Transportation Agency For Monterey County,Association of Monterey Bay Area Governments,AMBAG,Yes,True
269,-0.19,9_left,28_right,Nice,Lake,Robinson Rancheria,,,left_only,6372,Bear River Band of Rohnerville Rancheria,1,Humboldt County,Humboldt County Association of Governments,NON-MPO,NON-MPO,,False


### Match using Str Contains
* code help:  https://stackoverflow.com/questions/48631769/pandas-str-contains-search-for-multiple-values-in-a-string-and-print-the-value

In [197]:
# get list of column values from the lOfficial list of Locodes
names = matching['agency_name'].tolist()

In [198]:
pattern = '|'.join(names)

matching['contains'] = matching['a1_imp_agcy_name'].str.contains(pattern, case=False)

In [199]:
matching>>filter(_.contains==True)

Unnamed: 0,best_match_score,__id_left,__id_right,a1_imp_agcy_city,a2_county,a1_imp_agcy_name,a1_locode,agency_locode_left,_merge,agency_locode_right,agency_name,district,county_name,rtpa_name,mpo_name,mpo_locode_fads,active_e76s______7_12_2021_,isin,contains
2,0.11,2_left,87_right,Biggs,Butte,"Biggs, City of",,,left_only,5128,Biggs,3,Butte County,Butte County Association of Governments,Butte County Association Of Governments,BCAG,Yes,False,True
53,0.32,3_left,647_right,Hawaiian Gardens,Los Angeles,"Hawaiian Gardens, City of",,,left_only,5387,Hawaiian Gardens,7,Los Angeles County,Los Angeles County Metropolitan Transportation...,Southern California Association Of Governments,SCAG,,False,True
214,0.11,7_left,306_right,Fremont,Alameda,"Fremont, City of",,,left_only,5322,Fremont,4,Alameda County,Metropolitan Transportation Commission,Metropolitan Transportation Commission,MTC,Yes,False,True
265,0.25,8_left,445_right,Salinas,Monterey,Monterey County,,,left_only,5944,Monterey County,5,Monterey County,Transportation Agency For Monterey County,Association of Monterey Bay Area Governments,AMBAG,Yes,True,True
