# Data Prep for ATP 

**Duplicate notebook using concat method**

* manual cleaning needed outside of notebook for funding data

In [1]:
import intake
import numpy as np
import pandas as pd
from calitp import to_snakecase
from dla_utils import _dla_utils
from IPython.display import HTML, Markdown
from siuba import *

import altair as alt

import data_cleaning



In [2]:
pd.set_option("display.max_columns",500)

* some `a1_imp_agcy_fed_ma_num` are not present 
*  merge on `project_app_id`


* need function for reading in funding data and which projects get selected for funding
* sheets of Master_AllData that we need:
    * Master_Yes
    * Statewide SUR Details (merge with SUR Funding
    * Statewide SUR Funding (merge with SUR Details) 
* using a copy of the data to account for multiple headers

In [3]:
GCS_FILE_PATH = 'gs://calitp-analytics-data/data-analyses/dla/atp/'


## Read in Master Data

In [4]:
# reading the clean data (from atp script)
#df = data_cleaning.read_clean_data()

In [5]:
#df.sample()

In [6]:
def read_app_data():
    """
    Function for reading in the application data. Can then merge with funded data
    """
    # identify information columns that we need to drop
    columns_to_drop = ['a1_imp_agcy_contact','a1_imp_agcy_email','a1_imp_agcy_phone',
                      'a1_proj_partner_contact', 'a1_proj_partner_email', 'a1_proj_partner_phone']
    
    #read in data
    df = to_snakecase(pd.read_excel(f'{GCS_FILE_PATH}Master_AllData_Cycle5_Field_Mapping_COPY.xls',
              sheet_name='Master_Yes',
                                        header=[2]))
    df = df.drop(columns = columns_to_drop)
    
    #drop columns that will contain funding data (this df has the columns but no information for them)
    #we know funding data starts with columns `original_prog__amt___pa_ed_`
    df.drop(df.iloc[:,(df.columns.get_loc('original_prog__amt___pa_ed_')):], inplace=True, axis=1)  
    
    #drop identifier columns that are fully null (these columns are populated in funding data
    df = df.drop(columns={'#', 'atp_id', 'ppno', 'ppno_1'})
    
    return df

In [7]:
## cn use the version provided or the one we cleaned in the cleaning script
master_data2= read_app_data()

In [8]:
master_data =  data_cleaning.read_clean_data()


2022-10-26 15:07:16.712 | INFO     | data_cleaning:export_district_need_assistance:119 - got District matches: There are 25 Legislative District entries needing assistance
2022-10-26 at 15:07:16 | INFO | got District matches: There are 25 Legislative District entries needing assistance


There are 25 Legislative District entries needing assistance


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
2022-10-26 15:07:17.876 | INFO     | data_cleaning:check_counties:182 - got Locode matches: There are 7 Locode Matching entries needing assistance
2022-10-26 at 15:07:17 | INFO | got Locode matches: There are 7 Locode Matching entries needing assistance


There are 7 Locode Matching entries needing assistance


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
2022-10-26 15:07:18.491 | INFO     | data_cleaning:find_potential_locode_matches:238 - got potential Locode matches: There are 8 potential locode matching entries needing assistance
2022-10-26 at 15:07:18 | INFO | got potential Locode matches: There are 8 potential locode matching entries needing assistance


There are 8 potential locode matching entries needing assistance


2022-10-26 15:07:24.425 | INFO     | data_cleaning:clean_data:286 - got clean data: 882 entries
2022-10-26 at 15:07:24 | INFO | got clean data: 882 entries


Data cleaning complete. There are 882 entries in dataframe


In [9]:
master_data.head()

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_exhibit22_plan,attch_letters_of_support,attch_link,attch_ni_workplan,attch_project_estimate,completed_pdf_form,main_datetime_stamp,project_app_id,project_cycle,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,assembly_district,congressional_district,senate_district,#,atp_id,awarded,ppno,ppno_1,data_origin,geometry
0,Merced,10-5939R,Yes,Merced County,00033S,345 west 7th street,Deputy Director,95340,,,No,,21,,,16,,,Merced,10,"PA&ED, PS&E, and CON funding for construction ...",1) South side of Haskell Ave from Cody ave to ...,Planada Sidewalk Infill Project,Project is located outside one of the ten larg...,MCAG,No,0,37.29,120.31,The Planada Sidewalk Infill Project is located...,,,12,,,,No,,Yes,,Yes,,No,Infrastructure - Small,Yes,20.0,1,Yes,80,Yes,,,0,No,1,1802,Planada Sidewalk infill ATP cross section 1.pdf,Attachment A- Signature Page.pdf,Existing Photos Attachment.pdf,Planada ATP Plan Concept.pdf,Attachment-B-Engr-Checklist (MH).pdf,,Letters of Support.pdf,,,Project Estimate.pdf,,2020-06-09 10:33:08,10-Merced County-1,CYCLE 5,5939,No,No,,Sidewalk infill along portions of Haskell aven...,No,,,,1500.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,6.0,,1500.0,,,,,,5.0,,4.0,3.0,,,,,,,,,,,,,No,No,Yes,,,,,,,,,,,,1802,2020-06-09 10:33:08,N,0,N,0,N,0,N,0,,0.0,N,,0,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,21,16,12,,,N,,,Application,POINT (120.31282 37.29159)
1,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Bishop Street Class 3 Bicycle Boulevard with T...,Bishop Street from Flower Street to Standard A...,Bishop Street Bicycle Boulevard Project,Project is located within one of the ten large...,SCAG,Yes,2,33.74,117.86,This project will implement a Class 3 bicycle ...,,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Medium,Yes,50.0,0,Yes,50,No,,,0,No,4,1811,Attachment K - Not Applicable.pdf,Attachment A - Signature Page.pdf,Attachment E - Photos of Existing Conditions.pdf,Attachment D - Project .Plans.pdf,Attachment B - Checklist.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost .Estimate.pdf,,2020-08-20 18:49:12,"12-Santa Ana, City of-4",CYCLE 5,5063,No,No,,"Install 1.15 mile bike boulevard, construction...",Yes,,,,6336.0,,,,,,,,,,,,,,,2.0,,,,,,,,,,,,,,100,,,,,,,,,,,,,38.0,,15.0,16.0,,18.0,3.0,,,,1.0,6.0,,18.0,,,Yes,No,No,,,,,,,8800.0,,,,,1811,2020-08-20 18:49:12,N,0,N,0,N,0,N,0,,0.0,N,,0,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,69,46,34,,,N,,,Application,POINT (117.86443 33.73947)
2,City of Pacifica,04-5350-F15,Yes,"Pacifica, City of",,151 Milagra Drive,Associate Civil Engineer,94044,,,No,,22,,,14,,,San Mateo,4,CON funding for installing bicycling facilitie...,On Palmetto Ave between Paloma Ave and West Av...,Palmetto Ave - Esplanade Ave Bicycle & Pedestr...,Project is located outside one of the ten larg...,MTC,No,0,37.65,-122.49,The project will install a combination of Clas...,,,13,,,,No,,Yes,,Yes,,No,Infrastructure - Small,Yes,50.0,2,Yes,50,No,,,0,No,1,1804,,Attachment-A-Signature-page.pdf,Photos.pdf,Attachment D_Palmetto & Esplanade Ped-Bike Imp...,Attachment B_Engineers Checklist.pdf,,Letters of Support.pdf,,,Attachment F_ ATP Cycle 5_Palmetto-Esplanade B...,,2020-06-15 11:05:03,"4-Pacifica, City of-1",CYCLE 5,5350,0,0,,Bicycling and pedestrian amenities will be ins...,Yes,,,13752.0,5748.0,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,40,2.0,,,,20.0,,,,,,,,9.0,,,,,,,,,,,,,,,,Yes,No,No,,,,,,,,,,,,1804,2020-06-15 11:05:03,N,0,N,0,N,0,N,0,,0.0,N,,0,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,22,14,13,,,N,,,Application,POINT (-122.49178 37.64730)
3,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Pedestrian traffic safety improvements for Jef...,"In the City of Santa Ana, the safe routes to s...",Jefferson ES_Thorpe Fundamental_McFadden Int_G...,Project is located within one of the ten large...,SCAG,Yes,2,33.71,117.89,"This project will be repairing, replacing and ...",,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Large,No,,5,Yes,100,Yes,,,0,No,13,1822,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D -Plans.pdf,Attachment B - Check list.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,,2020-09-08 10:15:52,"12-Santa Ana, City of-13",CYCLE 5,5063,No,No,,"Construct curb extensions at 8 intersections, ...",Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,50,,,,,60.0,,,,Left Turn Arrow,3.0,Enhanced Crosswalk Unsignalized,3.0,218.0,1000.0,7.0,,,1.0,,,,,,,,7.0,,,Yes,No,No,,,,,,,,,,,,1822,2020-09-08 10:15:52,N,0,N,0,N,0,N,0,,0.0,N,,0,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,69,46,34,,,N,,,Application,POINT (117.89494 33.71126)
4,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702,,,No,,69,,,46,,,Orange,12,Pedestrian traffic safety improvements for La...,"In the City of Santa Ana, the safe routes to s...",Lathrop Intermediate_Lowell ES_Martin ES_Pio P...,Project is located within one of the ten large...,SCAG,Yes,4,33.73,117.87,"This project will be repairing, replacing and ...",,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Large,No,,5,Yes,100,Yes,,,0,No,14,1823,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D - Plan.pdf,Attachment B - Checklist.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,,2020-08-31 12:34:31,"12-Santa Ana, City of-14",CYCLE 5,5063,No,No,,"Construct curb extensions at 6 intersections, ...",Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,50,,,,,43.0,,,,Enhance crosswalk (unsignalized),7.0,Raised Crosswalk,2.0,189.0,3455.0,5.0,,,1.0,,,,,,,2.0,5.0,,,Yes,No,No,,,,,,,,,,,,1823,2020-08-31 12:34:31,N,0,N,0,N,0,N,0,,0.0,N,,0,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,69,46,34,,,N,,,Application,POINT (117.86884 33.73240)


In [10]:
len(list(master_data.columns))

218

## Function to read SUR funding data

In [11]:
def read_SUR_funding_data():
    """
    Function to read in ATP funding data. Function will need to change for future data.
    Notes:
    * `atp_id` columns appear the same but the sur_details has an extra zero in the middle of the string so it would not match
    * `a3_project_type` also is entered differently however, details has more details than the funding sheet. Has information on size of project. can add to new column
    * `a1_imp_agcy_name_x` has manual errors so selecting `a1_imp_agcy_name_y`
    """
    # identify information columns that we need to drop
    columns_to_drop = ['a1_imp_agcy_contact','a1_imp_agcy_email','a1_imp_agcy_phone',
                      'a1_proj_partner_contact', 'a1_proj_partner_email', 'a1_proj_partner_phone']
    #read in SUR details and SUR funding data
    sur_details = to_snakecase(pd.read_excel(f'{GCS_FILE_PATH}Master_AllData_Cycle5_Field_Mapping_COPY.xls',
              sheet_name='Statewide SUR Details'))
    sur_details = sur_details.drop(columns = columns_to_drop)
    
    sur_funding = to_snakecase(pd.read_excel(f'{GCS_FILE_PATH}Master_AllData_Cycle5_Field_Mapping_COPY.xls',
              sheet_name='Statewide SUR Funding'))
    
    #drop the last few columns of SUR Details that have no funding data entered, but have columns
    sur_details.drop(sur_details.iloc[:,199:], inplace=True, axis=1)
    
    #remove rows with all null values
    cols_to_check = sur_funding.columns
    sur_funding['is_na'] = sur_funding[cols_to_check].isnull().apply(lambda x: all(x), axis=1) 
    sur_funding = sur_funding>>filter(_.is_na==False)
    sur_funding = sur_funding.drop(columns={'is_na'})

    #delete rows identified that are not part of the data (informational cells) or a sum total for all entries
    delete_row = sur_funding[sur_funding["project_cycle"]== 'Added Field not from App'].index
    sur_funding = sur_funding.drop(delete_row)
    
    delete_row = sur_funding[sur_funding["total_project_cost"]== '370,984,000.00'].index
    sur_funding = sur_funding.drop(delete_row)
    
    #merge sur_funding and sur_details
    merge_on = ['project_app_id', 'project_cycle', 'a2_ct_dist', 'a1_locode']
    df = (pd.merge(sur_details, sur_funding, how="outer", on = merge_on, indicator=True))
    
    #keep entries that merge. Right_only rows are misentered and more informational columns  
    df = df>>filter(_._merge=='both')
    
    # filling the null values for some of the duplicate columns
    # manually checking that values are the same as of now- will add function to check when we get the data links
    df['awarded_x'] = df['awarded_x'].fillna(df['awarded_y'])
    df['ppno_y'] = df['ppno_y'].fillna(df['ppno_x'])
    
    #renaming and dropping duplicate columns 
    ## a1_imp_agcy_name_x has manual errors so selecting a1_imp_agcy_name_y
    df = df.rename(columns={'awarded_x':'awarded',
                                'ppno_y':'ppno',
                                'a1_imp_agcy_name_y':'a1_imp_agcy_name',
                                'a2_info_proj_name_y':'a2_info_proj_name'
                               })
    df = df.drop(columns={'awarded_y', 'a1_imp_agcy_name_x', 'a2_info_proj_name_x','ppno_x', '_merge'})
    df["data_origin"]="Funded"
    
    return df

In [12]:
funded = read_SUR_funding_data()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [13]:
len(funded)

49

In [14]:
len(list(funded.columns))

264

In [15]:
funded.head()

Unnamed: 0,awarded,project_cycle,a2_ct_dist,#,atp_id_x,ppno_1,a3_proj_type_x,project_app_id,a2_county,a1_locode,a1_imp_agcy_street,a1_imp_agcy_city,a1_imp_agcy_zip,a1_imp_agcy_title,a1_imp_agcy_ma,a1_imp_agcy_state_ma_num,a1_imp_agcy_fed_ma_num,a1_proj_partner_exists,a1_proj_partner_agcy,a1_proj_partner_title,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,congressional_district,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,senate_district,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a2_info_proj_descr,a2_info_proj_loc,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_current_plan,a3_trails,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,agency_fully_own_r_w,"require_r_w_easement,_from_gov",_require_rw_easement_from_private,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,project_status,solicitation_abv,solicitation,soliciting_agency,atp_id_y,ppno,ppno1,oversight_id,a3_proj_type_y,project_size,total_project_cost,a1_imp_agcy_name,a2_info_proj_name,original_prog__amt___pa_ed_,orig__prog__year__pa_ed_,original_prog__amt___ps_e_,orig__prog__year__ps_e_,original_prog__amt___rw_,orig__prog__year__rw_,orignal_prog__amt___con_,orig__prog__year__con_,original_prog__amt___con_ni_,orig__prog__year__con_ni_,total,_2122_pa_ed,_2122_ps_e,_2122_rw,_2122_con,_2122_con_ni,_2122_total,fund_year_1,_2223_pa_ed,_2223_ps_e,_2223_rw,_2223_con,_2223_con_ni,_2223_total,fund_year_2,_2324_pa_ed,_2324_ps_e,_2324_rw,_2324_con,_2324_con_ni,_2324_total,fund_year_3,_2425_pa_ed,_2425_ps_e,_2425_rw,_2425_con,_2425_con_ni,_2425_total,fund_year_4,total_atp_$,match_total_atp_$_and_total,match_total_atp_$_and_total_atp_x_1000,paed,ps_e_,rw,con,con_ni,total_atp__000s_,total_atp_x_1000,year__pa_ed_,year__ps_e_,year__rw_,year__con_,year__con_ni_,data_origin
0,Y,5.0,5.0,,ATP5-05-001R,,Infrastructure - Small,"5-Santa Barbara, City of-3",SB,5007,630 Garden Street,Santa Barbara,93101.0,Principal Transportation Engineer,Yes,00167S,05-5007F15,No,,,37,0.0,3.0,7.0,24,0.0,2.0,4.0,19,0.0,1.0,9.0,Design and construct buffered bike lanes on De...,On De La Vina Street from Alamar Avenue to Pad...,Project is located outside one of the ten larg...,SBCAG,No,0.0,34.43,-119.72,-Existing Conditions-\r\rDe La Vina Street had...,,,,No,,Yes,,Yes,,No,Yes,50.0,3.0,Yes,50.0,No,0.0,,,Yes,,0,0,,Install .65 miles of new Class II buffered bik...,,,3475.0,4965.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.0,,,,,210.0,,,,,,21.0,,,,,,,,,,,,,6.0,,,X,,,,,,,,,,,,,,N,0.0,N,0.0,N,0.0,N,0.0,,0.0,N,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,Active,R,SMALL URBAN & RURAL SOLICITATION-CYCLE 5,CALTRANS,ATP5-5-001R,3057,,Standard,Infrastructure,Small,1998000.0,"Santa Barbara, City of",Upper De La Vina Street Gap Closure and Safe C...,290000.0,21/22,29000.0,23/24,8000.0,23/24,1671000.0,24/25,,,1998000.0,290000.0,,,,,290000.0,21/22,,,,,,0.0,22/23,,29000.0,8000.0,,,37000.0,23/24,,,,1671000.0,,1671000.0,24/25,1998000.0,YES,YES,290.0,29.0,8.0,1671.0,0.0,1998.0,1998000.0,21/22,23/24,23/24,24/25,,Funded
1,Y,5.0,7.0,,ATP5-07-002S,,Infrastructure - Small,"7-South El Monte, City of-1",LA,5352,1415 Santa Anita Avenue,South El Monte,91733.0,Project Manager,Yes,00054S,07-5352,No,,,57,57.0,,,38,38.0,,,22,22.0,,,This project focuses on school and pedestrian ...,The project is fully in the City of South El M...,Project is located within one of the ten large...,SCAG,No,0.0,34.05,118.05,This project focuses on school and pedestrian ...,,,,No,,No,,No,,Yes,No,0.0,7.0,Yes,100.0,Yes,0.0,,,Yes,,0,0,,Installation of pedestrian safety upgrades at ...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,100.0,1.0,,,2.0,,,,,LED Blinding Stop Signs,13.0,Speed Radar Feedback Signs,5.0,,,9.0,,8.0,,,,,,,,8.0,,32.0,,X,,,,,,,,,,,,,,N,0.0,N,0.0,N,0.0,N,0.0,,0.0,N,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,Active,S,STATEWIDE SOLICITATION-CYCLE 5,CALTRANS,ATP5-7-002S,5858,,Standard,Infrastructure,Small,1637000.0,"South El Monte, City of",South El Monte Safe Routes to School Pedestria...,10000.0,21/22,130000.0,22/23,,,1497000.0,23/24,,,1637000.0,10000.0,,,,,10000.0,21/22,,130000.0,,,,130000.0,22/23,,,,1497000.0,,1497000.0,23/24,,,,,,0.0,24/25,1637000.0,YES,YES,10.0,130.0,0.0,1497.0,0.0,1637.0,1637000.0,21/22,22/23,,23/24,,Funded
2,Y,5.0,4.0,,ATP5-04-003S,2343B,Infrastructure + NI - Large,"4-Fairfield, City of-1",SOL,5132,1000 Webster Street,Fairfield,94533.0,"Asst Director of PW, City Engineer",Yes,,04-5132R,No,,,11,11.0,,,3,3.0,,,3,3.0,,,Construct Class IV separated bikeways with Cla...,West Texas Street between Beck Avenue and Penn...,Project is located within one of the ten large...,MTC,No,0.0,38.25,-122.06,"Located adjacent to downtown Fairfield, this p...",,,,Yes,,Yes,,Yes,,Yes,Yes,38.0,2.0,Yes,62.0,Yes,0.0,,,Yes,,0,0,Heart of Fairfield Specific Plan,Road diet replacing travel lanes with Class IV...,12.0,,3120.0,,5720.0,,3.0,,,,,,,,,,,4090.0,5.0,,,,,,,,,,,,,,,,,,,3.0,4020.0,,,,,,,27.0,2100.0,14.0,2.0,32.0,2.0,5.0,,,,1.0,,1.0,2.0,4.0,6140.0,,,X,,,,,,,7164.0,,5.0,1.0,,N,0.0,N,0.0,Y,100.0,N,0.0,,0.0,N,,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,7.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,4.0,Mobile Bike Repair Events,8.0,Evaluation Time Periods - 2 each hand tallies ...,Y,N,Y,Y,N,N,,,N,N,N,N,N,Y,Solano Transportation Authority (STA) is the C...,N,N,N,N,No,Active,S,STATEWIDE SOLICITATION-CYCLE 5,CALTRANS,ATP5-4-003S,2343A,2343B,Standard,Combined (IF and NI),Large,16922000.0,"Fairfield, City of",West Texas Street Complete Streets Project,,,838000.0,22/23,,,9948000.0,23/24,117000.0,22/23,10903000.0,,,,,,0.0,21/22,,838000.0,,,117000.0,955000.0,22/23,,,,9948000.0,,9948000.0,23/24,,,,,,0.0,24/25,10903000.0,YES,YES,0.0,838.0,0.0,9948.0,117.0,10903.0,10903000.0,,22/23,,23/24,22/23,Funded
3,Y,5.0,5.0,,ATP5-05-004S,3058B,Infrastructure + NI - Large,"5-Santa Cruz, City of-2",SCR,5025,809 Center St,Santa Cruz,95060.0,Senior Engineer,Yes,00244S,05-5025R,No,,,29,29.0,,,"18, 20",18.0,20.0,,17,17.0,,,Construction of .8 miles of Segment 7 of the R...,Adjacent to the Santa Cruz Branch Rail Line be...,Project is located outside one of the ten larg...,AMBAG,Yes,2.0,36.96,-122.03,The project will close a .8 mile gap in the Ra...,,SCCRTC,,Yes,,No,,No,,Yes,Yes,50.0,1.0,Yes,50.0,Yes,0.0,Yes,100.0,Yes,X,No,No,,Construction of .8 miles of Segment 7 of the R...,,,,,,,,,,,,,,,,,,,,,,,,,4172.0,,Wayfinding signage,40.0,Lighting,45.0,,,100.0,,,,,,,,,,,,,,,,,,,,,38.0,Willow and heritage,,,,,,,,,X,,,,,,,,,,,,N,0.0,N,30.0,N,70.0,N,0.0,,0.0,N,,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,24.0,4.0,0.0,24.0,12.0,0.0,0.0,0.0,144.0,0.0,6.0,20.0,Parent education classes on bike and pedestria...,8.0,Group family rides,Y,N,Y,Y,Y,N,,Spanish,Y,Y,Y,Y,Y,N,,N,N,N,N,No,Active,S,STATEWIDE SOLICITATION-CYCLE 5,CALTRANS,ATP5-5-004S,3058A,,Standard,Combined (IF and NI),Large,12030000.0,"Santa Cruz, City of",Santa Cruz Rail Trail Segment 7 Phase 2 Constr...,,,,,,,8634000.0,21/22,550000.0,21/22,9184000.0,,,,8634000.0,550000.0,9184000.0,21/22,,,,,,0.0,22/23,,,,,,0.0,23/24,,,,,,0.0,24/25,9184000.0,YES,YES,0.0,0.0,0.0,8634.0,550.0,9184.0,9184000.0,,,,21/22,21/22,Funded
4,Y,5.0,11.0,,ATP5-11-005S,,Infrastructure + NI - Small,"11-Oceanside, City of-1",SD,5079,300 N Coast Highway,Oceanside,92024.0,Active Transportation and Micromobility Coordi...,Yes,00369S,11-5079R,No,,,76,76.0,,,49,49.0,,,36,36.0,,,The Laurel Elementary SRTS includes infrastruc...,The project is located in the Eastside communi...,Project is located within one of the ten large...,SANDAG,No,0.0,33.21,-117.37,Laurel Elementary Safe Routes to School (SRTS)...,,,,No,,Yes,,Yes,,No,Yes,10.0,1.0,Yes,90.0,Yes,0.0,,,No,,No,No,,Infrastructure improvements of a mini roundabo...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,25.0,2.0,,,1.0,29.0,,868.0,,traffic circle and pavement markings,1.0,,,,,,,,,,,,,,1.0,,1.0,12.0,,X,,,traffic calming median on San Diego,,1.0,,,,,,,,,N,0.0,N,0.0,Y,100.0,N,0.0,,0.0,N,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,2.0,2.0,1.0,0.0,0.0,2.0,0.0,0.0,12.0,Stakeholder Meetings,2.0,Crossing Guard Trainings,Y,N,Y,Y,Y,N,,Spanish,Y,Y,Y,Y,Y,N,,N,N,N,N,No,Active,S,STATEWIDE SOLICITATION-CYCLE 5,CALTRANS,ATP5-11-005S,1442,,Standard,Combined (IF and NI),Small,1535000.0,"Oceanside, City of",Laurel Elementary Safe Routes to School,160000.0,21/22,160000.0,22/23,,,1075000.0,23/24,127000.0,21/22,1522000.0,160000.0,,,,127000.0,287000.0,21/22,,160000.0,,,,160000.0,22/23,,,,1075000.0,,1075000.0,23/24,,,,,,0.0,24/25,1522000.0,YES,YES,160.0,160.0,0.0,1075.0,127.0,1522.0,1522000.0,21/22,22/23,,23/24,21/22,Funded


In [16]:
import re

In [17]:
dcolumns1 = [col for col in funded.columns if isinstance(col, str) and re.match('.*_x', col)]

In [18]:
dcolumns2 = [col for col in funded.columns if isinstance(col, str) and re.match('.*_y', col)]

In [19]:
dcolumns1

['atp_id_x',
 'a3_proj_type_x',
 'match_total_atp_$_and_total_atp_x_1000',
 'total_atp_x_1000']

In [20]:
dcolumns2

['atp_id_y',
 'a3_proj_type_y',
 'orig__prog__year__pa_ed_',
 'orig__prog__year__ps_e_',
 'orig__prog__year__rw_',
 'orig__prog__year__con_',
 'orig__prog__year__con_ni_',
 'fund_year_1',
 'fund_year_2',
 'fund_year_3',
 'fund_year_4']

In [21]:
funded>>select(_.atp_id_x, _.atp_id_y,
               _.a3_proj_type_x, _.a3_proj_type_y)

Unnamed: 0,atp_id_x,atp_id_y,a3_proj_type_x,a3_proj_type_y
0,ATP5-05-001R,ATP5-5-001R,Infrastructure - Small,Infrastructure
1,ATP5-07-002S,ATP5-7-002S,Infrastructure - Small,Infrastructure
2,ATP5-04-003S,ATP5-4-003S,Infrastructure + NI - Large,Combined (IF and NI)
3,ATP5-05-004S,ATP5-5-004S,Infrastructure + NI - Large,Combined (IF and NI)
4,ATP5-11-005S,ATP5-11-005S,Infrastructure + NI - Small,Combined (IF and NI)
5,ATP5-06-006S,ATP5-6-006S,Infrastructure + NI - Small,Combined (IF and NI)
6,ATP5-10-007S,ATP5-10-007S,Infrastructure - Small,Infrastructure
7,ATP5-05-008S,ATP5-5-008S,Infrastructure - Large,Infrastructure
8,ATP5-02-009R,ATP5-2-009R,Infrastructure - Large,Infrastructure
9,ATP5-06-010S,ATP5-6-010S,Infrastructure + NI - Small,Combined (IF and NI)


## Merging with Master_Data

In [22]:
master_data.sample()

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_exhibit22_plan,attch_letters_of_support,attch_link,attch_ni_workplan,attch_project_estimate,completed_pdf_form,main_datetime_stamp,project_app_id,project_cycle,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,assembly_district,congressional_district,senate_district,#,atp_id,awarded,ppno,ppno_1,data_origin,geometry
507,Alhambra,07-6303R,Yes,San Gabriel Valley Council of Governments,00363S,"1000 South Fremont Avenue, Unit 42",Director of Capital Projects/Chief Engineer,91803,,,No,,58,,,38,,,Los Angeles,7,Upgraded railroad crossing signals and signage...,Two Union Pacific Railroad (UPRR)/Metrolink co...,Montebello Railroad Safety Crossings Improvements,Project is located within one of the ten large...,SCAG,No,0,,,The city of Montebello contains many disadvant...,,,32,,,,Yes,,No,,No,,No,Infrastructure - Medium,Yes,35.0,3,Yes,65,Yes,,,0,No,2,3386,Attachment_K-Warrant Exception.pdf,Attachment_A-CEO_Signature.pdf,Attachment_E-Current Conditions.pdf,Attachment_D-Vail&Greenwood_Final Plans.pdf,Attachment_B-Engr-Checklist.pdf,,Attachment_I-LettersOfSupport.pdf,,,Attachment_F-Project-Estimate-v3.pdf,,2022-06-14 15:04:08,7-San Gabriel Valley Council of Governments-2,CYCLE 6,6303,0,0,City of Montebello Complete Streets Policy,Upgraded railroad crossing signals and signage...,Yes,4.0,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,0,,2.0,,,6.0,,,,Ped Gates,6.0,,,,465.0,,4.0,,,,,,,6.0,,,,,,No,Yes,No,,,,,,,,,,,,3386,2022-06-14 15:04:08,N,0,N,0,N,0,N,0,,,N,,0,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,58,38,32,,,N,,,Application,POINT EMPTY


In [23]:
funded.sample()

Unnamed: 0,awarded,project_cycle,a2_ct_dist,#,atp_id_x,ppno_1,a3_proj_type_x,project_app_id,a2_county,a1_locode,a1_imp_agcy_street,a1_imp_agcy_city,a1_imp_agcy_zip,a1_imp_agcy_title,a1_imp_agcy_ma,a1_imp_agcy_state_ma_num,a1_imp_agcy_fed_ma_num,a1_proj_partner_exists,a1_proj_partner_agcy,a1_proj_partner_title,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,congressional_district,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,senate_district,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a2_info_proj_descr,a2_info_proj_loc,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_current_plan,a3_trails,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,agency_fully_own_r_w,"require_r_w_easement,_from_gov",_require_rw_easement_from_private,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,project_status,solicitation_abv,solicitation,soliciting_agency,atp_id_y,ppno,ppno1,oversight_id,a3_proj_type_y,project_size,total_project_cost,a1_imp_agcy_name,a2_info_proj_name,original_prog__amt___pa_ed_,orig__prog__year__pa_ed_,original_prog__amt___ps_e_,orig__prog__year__ps_e_,original_prog__amt___rw_,orig__prog__year__rw_,orignal_prog__amt___con_,orig__prog__year__con_,original_prog__amt___con_ni_,orig__prog__year__con_ni_,total,_2122_pa_ed,_2122_ps_e,_2122_rw,_2122_con,_2122_con_ni,_2122_total,fund_year_1,_2223_pa_ed,_2223_ps_e,_2223_rw,_2223_con,_2223_con_ni,_2223_total,fund_year_2,_2324_pa_ed,_2324_ps_e,_2324_rw,_2324_con,_2324_con_ni,_2324_total,fund_year_3,_2425_pa_ed,_2425_ps_e,_2425_rw,_2425_con,_2425_con_ni,_2425_total,fund_year_4,total_atp_$,match_total_atp_$_and_total,match_total_atp_$_and_total_atp_x_1000,paed,ps_e_,rw,con,con_ni,total_atp__000s_,total_atp_x_1000,year__pa_ed_,year__ps_e_,year__rw_,year__con_,year__con_ni_,data_origin
23,Y,5.0,11.0,,ATP5-11-024S,,Infrastructure - Small,"11-National City, City of-3",SD,5066,1243 National City Boulevard,National City,91950.0,Director of Public Works/City Engineer,Yes,00013S,11-5066R,No,,,80,80.0,,,51,51.0,,,40,40.0,,,"Construct Class IV and II bicycle facilities, ...","National City, CA. Highland Avenue from 30th ...",Project is located within one of the ten large...,SANDAG,Yes,1.0,32.66,32.66,The Highland Avenue Inter-City Bike Connection...,,,,Yes,,No,,No,,No,Yes,85.0,0.0,Yes,15.0,No,0.0,,,Yes,,0,0,,Addition of bicycle facilities on Highland Ave...,1.0,,1100.0,,2800.0,,,,,,,,,Intersection conflict striping,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,13.0,,2.0,6.0,,,,,,,,,,,,,,,,Narrow travel lanes,,2.0,,,,,,,,,N,0.0,N,0.0,N,0.0,N,0.0,,0.0,N,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,Active,S,STATEWIDE SOLICITATION-CYCLE 5,CALTRANS,ATP5-11-024S,1441,,Standard,Infrastructure,Small,1897000.0,"National City, City of",Highland Avenue Inter-City Bike Connection,58000.0,21/22,260000.0,22/23,,,1025000.0,23/24,,,1343000.0,58000.0,,,,,58000.0,21/22,,260000.0,,,,260000.0,22/23,,,,1025000.0,,1025000.0,23/24,,,,,,0.0,24/25,1343000.0,YES,YES,58.0,260.0,0.0,1025.0,0.0,1343.0,1343000.0,21/22,22/23,,23/24,,Funded


In [24]:
master_data.loc[master_data['project_cycle'] == 'CYCLE 5', 'project_cycle'] = 5
master_data.loc[master_data['project_cycle'] == 'CYCLE 6', 'project_cycle'] = 6

In [25]:
master_data[['a2_ct_dist','project_cycle']] = master_data[['a2_ct_dist','project_cycle']].astype(int)

In [26]:
master_data[['a1_locode']] = master_data[['a1_locode']].astype(object)

In [27]:
subset = master_data>>select(_.project_app_id, _.project_cycle, _.a2_ct_dist, _.a1_locode)

In [28]:
subset.project_cycle.value_counts()

5    448
6    434
Name: project_cycle, dtype: int64

In [29]:
subset.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 882 entries, 0 to 881
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   project_app_id  882 non-null    object
 1   project_cycle   882 non-null    int64 
 2   a2_ct_dist      882 non-null    int64 
 3   a1_locode       868 non-null    object
dtypes: int64(2), object(2)
memory usage: 34.5+ KB


In [30]:
(master_data2>>select(_.project_app_id, _.project_cycle, _.a2_ct_dist, _.a1_locode)).info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 454 entries, 0 to 453
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   project_app_id  454 non-null    object
 1   project_cycle   454 non-null    int64 
 2   a2_ct_dist      454 non-null    int64 
 3   a1_locode       453 non-null    object
dtypes: int64(2), object(2)
memory usage: 14.3+ KB


In [31]:
(funded.iloc[:,0:20]).info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 49 entries, 0 to 48
Data columns (total 20 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   awarded                   49 non-null     object 
 1   project_cycle             49 non-null     object 
 2   a2_ct_dist                49 non-null     float64
 3   #                         0 non-null      float64
 4   atp_id_x                  49 non-null     object 
 5   ppno_1                    8 non-null      object 
 6   a3_proj_type_x            49 non-null     object 
 7   project_app_id            49 non-null     object 
 8   a2_county                 49 non-null     object 
 9   a1_locode                 49 non-null     object 
 10  a1_imp_agcy_street        49 non-null     object 
 11  a1_imp_agcy_city          49 non-null     object 
 12  a1_imp_agcy_zip           49 non-null     float64
 13  a1_imp_agcy_title         49 non-null     object 
 14  a1_imp_agcy_

In [32]:
merge_on = ['project_app_id', 'project_cycle', 'a2_ct_dist', 'a1_locode']

In [33]:
#pd.concat([subset,funded])

In [34]:
c1 = subset.select_dtypes(np.float).columns
c1

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations


Index([], dtype='object')

In [35]:
# will have suplicates for atp_id_x a3_proj_type_x
(pd.merge(subset, funded, how='outer', on= merge_on, indicator='full_merge')).head()



Unnamed: 0,project_app_id,project_cycle,a2_ct_dist,a1_locode,awarded,#,atp_id_x,ppno_1,a3_proj_type_x,a2_county,a1_imp_agcy_street,a1_imp_agcy_city,a1_imp_agcy_zip,a1_imp_agcy_title,a1_imp_agcy_ma,a1_imp_agcy_state_ma_num,a1_imp_agcy_fed_ma_num,a1_proj_partner_exists,a1_proj_partner_agcy,a1_proj_partner_title,assembly_district,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,congressional_district,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,senate_district,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a2_info_proj_descr,a2_info_proj_loc,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_current_plan,a3_trails,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,agency_fully_own_r_w,"require_r_w_easement,_from_gov",_require_rw_easement_from_private,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,project_status,solicitation_abv,solicitation,soliciting_agency,atp_id_y,ppno,ppno1,oversight_id,a3_proj_type_y,project_size,total_project_cost,a1_imp_agcy_name,a2_info_proj_name,original_prog__amt___pa_ed_,orig__prog__year__pa_ed_,original_prog__amt___ps_e_,orig__prog__year__ps_e_,original_prog__amt___rw_,orig__prog__year__rw_,orignal_prog__amt___con_,orig__prog__year__con_,original_prog__amt___con_ni_,orig__prog__year__con_ni_,total,_2122_pa_ed,_2122_ps_e,_2122_rw,_2122_con,_2122_con_ni,_2122_total,fund_year_1,_2223_pa_ed,_2223_ps_e,_2223_rw,_2223_con,_2223_con_ni,_2223_total,fund_year_2,_2324_pa_ed,_2324_ps_e,_2324_rw,_2324_con,_2324_con_ni,_2324_total,fund_year_3,_2425_pa_ed,_2425_ps_e,_2425_rw,_2425_con,_2425_con_ni,_2425_total,fund_year_4,total_atp_$,match_total_atp_$_and_total,match_total_atp_$_and_total_atp_x_1000,paed,ps_e_,rw,con,con_ni,total_atp__000s_,total_atp_x_1000,year__pa_ed_,year__ps_e_,year__rw_,year__con_,year__con_ni_,data_origin,full_merge
0,10-Merced County-1,5.0,10.0,5939,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
1,"12-Santa Ana, City of-4",5.0,12.0,5063,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
2,"4-Pacifica, City of-1",5.0,4.0,5350,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
3,"12-Santa Ana, City of-13",5.0,12.0,5063,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
4,"12-Santa Ana, City of-14",5.0,12.0,5063,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only


In [36]:
(pd.merge(subset, funded, how='outer', on= merge_on, indicator='full_merge')).full_merge.value_counts()



left_only     882
right_only     49
both            0
Name: full_merge, dtype: int64

In [37]:
(pd.merge(subset, funded, how='outer', on= merge_on, indicator='full_merge')).info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 931 entries, 0 to 930
Columns: 265 entries, project_app_id to full_merge
dtypes: category(1), float64(155), object(109)
memory usage: 1.9+ MB




In [38]:
# subset data in app data (will concat data later) and keep full funded columns

### test using merge

In [39]:
#(pd.merge(master_data, funded, how='outer', on= merge_on, indicator='full_merge')).head()

In [40]:
#(pd.merge(master_data, funded, how='outer', on=merge_on2, indicator='full_merge')).full_merge.value_counts()

In [41]:
master_data.columns.intersection(funded.columns)

Index(['a1_imp_agcy_city', 'a1_imp_agcy_fed_ma_num', 'a1_imp_agcy_ma',
       'a1_imp_agcy_name', 'a1_imp_agcy_state_ma_num', 'a1_imp_agcy_street',
       'a1_imp_agcy_title', 'a1_imp_agcy_zip', 'a1_proj_partner_agcy',
       'a1_proj_partner_exists',
       ...
       'a4_plan_school_routes', 'a4_row_open_street_demo', 'assembly_district',
       'congressional_district', 'senate_district', '#', 'awarded', 'ppno',
       'ppno_1', 'data_origin'],
      dtype='object', length=195)

In [42]:
master_data.columns.difference(funded.columns)

Index(['a1_letter_of_intent', 'a3_proj_type', 'a4_row_100', 'a4_row_gov_ease',
       'a4_row_private_ease', 'agency_app_num', 'app_fk', 'app_pk', 'atp_id',
       'attch_addtl_attachments', 'attch_app_sig_page',
       'attch_conditions_photos', 'attch_conditions_project_map',
       'attch_engineeers_checklist', 'attch_exhibit22_plan',
       'attch_letters_of_support', 'attch_link', 'attch_ni_workplan',
       'attch_project_estimate', 'completed_pdf_form',
       'details_datetime_stamp', 'geometry', 'main_datetime_stamp'],
      dtype='object')

In [43]:
funded.columns.difference(master_data.columns)

Index(['_2122_con', '_2122_con_ni', '_2122_pa_ed', '_2122_ps_e', '_2122_rw',
       '_2122_total', '_2223_con', '_2223_con_ni', '_2223_pa_ed', '_2223_ps_e',
       '_2223_rw', '_2223_total', '_2324_con', '_2324_con_ni', '_2324_pa_ed',
       '_2324_ps_e', '_2324_rw', '_2324_total', '_2425_con', '_2425_con_ni',
       '_2425_pa_ed', '_2425_ps_e', '_2425_rw', '_2425_total',
       '_require_rw_easement_from_private', 'a3_proj_type_x', 'a3_proj_type_y',
       'agency_fully_own_r_w', 'atp_id_x', 'atp_id_y', 'con', 'con_ni',
       'fund_year_1', 'fund_year_2', 'fund_year_3', 'fund_year_4',
       'match_total_atp_$_and_total', 'match_total_atp_$_and_total_atp_x_1000',
       'orig__prog__year__con_', 'orig__prog__year__con_ni_',
       'orig__prog__year__pa_ed_', 'orig__prog__year__ps_e_',
       'orig__prog__year__rw_', 'original_prog__amt___con_ni_',
       'original_prog__amt___pa_ed_', 'original_prog__amt___ps_e_',
       'original_prog__amt___rw_', 'orignal_prog__amt___con_', 'oversi

In [44]:
# # merging on all common columns does not work
# merge_on3 = list(master_data.columns.intersection(funded.columns))

* merging on [`project_app_id`, `project_cycle`, `a2_ct_dist`, `a1_locode`] 

In [45]:
merge_on3 = ['project_app_id', 'project_cycle', 'a2_ct_dist', 
             # 'a1_locode', 'a2_county', 'a2_info_proj_name',
             # 'a1_imp_agcy_name', 
              # 'a1_imp_agcy_street', 'a1_imp_agcy_city',
 # 'a1_imp_agcy_zip', 'a1_imp_agcy_title', 'a1_imp_agcy_ma', 'a1_imp_agcy_state_ma_num',
 # 'a1_imp_agcy_fed_ma_num', 'a1_proj_partner_exists', 'a1_proj_partner_agcy', 'a1_proj_partner_title',
 #'assembly_district', 'a2_assem_dist_a', 'a2_assem_dist_b', 'a2_assem_dist_c',
 #'congressional_district', 'a2_congress_dist_a', 'a2_congress_dist_b', 'a2_congress_dist_c', 'senate_district',
 #'a2_senate_dist_a', 'a2_senate_dist_b', 'a2_senatedistc', 'a2_info_proj_loc',
  # 'a2_mop_uza_population', 'a2_mpo', 'a2_past_proj', 'a2_past_proj_qty',
  # 'a2_proj_lat', 'a2_proj_long', 'a2_proj_scope_summary',
  # 'a2_project_location_map', 'a2_rtpa', 'a3_plan_active_trans', 'a3_plan_active_trans_exists',
  # 'a3_plan_bicycle', 'a3_plan_bicycle_exists', 'a3_plan_ped', 'a3_plan_ped_exists',
  # 'a3_plan_srts', 'a3_plan_srts_exists', 'a3_st_bicycle_applies',
  # 'a3_st_bicycle_pct', 'a3_st_num_schools', 'a3_st_ped_applies',
   # 'a3_st_ped_pct', 'a3_st_srts', 'a3_trail_elig_cost', 'a3_trail_fed_funding',
]

In [46]:
#(master_data2[master_data2.columns.intersection(merge_on3)]).info()

In [47]:
#(master_data[master_data.columns.intersection(merge_on3)]).info()

In [48]:
dfall = (pd.merge(master_data, funded, how='outer', on=merge_on3, indicator='full_merge'))



In [49]:
dfall.full_merge.value_counts()

left_only     835
both           47
right_only      2
Name: full_merge, dtype: int64

In [50]:
compare_entries = np.where(dfall["a1_imp_agcy_name_x"] == dfall["a1_imp_agcy_name_y"], True, False)
dfall["compare_desc"] = compare_entries



In [51]:
dfall.compare_desc.value_counts()

False    838
True      46
Name: compare_desc, dtype: int64

In [104]:
dfall>>filter(_.compare_desc==False)>>select(_.a1_imp_agcy_name_x,  _.a1_imp_agcy_name_y)>>arrange(_.a1_imp_agcy_name_y)

Unnamed: 0,a1_imp_agcy_name_x,a1_imp_agcy_name_y
882,,"Maywood, City of"
883,,"Oakland, City of"
33,"South El Monte, City of-4","South El Monte, City of"
0,Merced County,
1,"Santa Ana, City of",
...,...,...
877,"Thousand Oaks, City of",
878,"South El Monte, City of",
879,"San Jose, City of",
880,Santa Barbara County,


In [53]:
dfall>>filter(_.full_merge=='right_only')

Unnamed: 0,a1_imp_agcy_city_x,a1_imp_agcy_fed_ma_num_x,a1_imp_agcy_ma_x,a1_imp_agcy_name_x,a1_imp_agcy_state_ma_num_x,a1_imp_agcy_street_x,a1_imp_agcy_title_x,a1_imp_agcy_zip_x,a1_letter_of_intent,a1_proj_partner_agcy_x,a1_proj_partner_exists_x,a1_proj_partner_title_x,a2_assem_dist_a_x,a2_assem_dist_b_x,a2_assem_dist_c_x,a2_congress_dist_a_x,a2_congress_dist_b_x,a2_congress_dist_c_x,a2_county_x,a2_ct_dist,a2_info_proj_descr_x,a2_info_proj_loc_x,a2_info_proj_name_x,a2_mop_uza_population_x,a2_mpo_x,a2_past_proj_x,a2_past_proj_qty_x,a2_proj_lat_x,a2_proj_long_x,a2_proj_scope_summary_x,a2_project_location_map_x,a2_rtpa_x,a2_senate_dist_a_x,a2_senate_dist_b_x,a2_senatedistc_x,a3_plan_active_trans_x,a3_plan_active_trans_exists_x,a3_plan_bicycle_x,a3_plan_bicycle_exists_x,a3_plan_ped_x,a3_plan_ped_exists_x,a3_plan_srts_x,a3_plan_srts_exists_x,a3_proj_type,a3_st_bicycle_applies_x,a3_st_bicycle_pct_x,a3_st_num_schools_x,a3_st_ped_applies_x,a3_st_ped_pct_x,a3_st_srts_x,a3_trail_elig_cost_x,a3_trail_fed_funding_x,a3_trail_trans_pct_x,a3_trails_x,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_exhibit22_plan,attch_letters_of_support,attch_link,attch_ni_workplan,attch_project_estimate,completed_pdf_form,main_datetime_stamp,project_app_id,project_cycle,a1_locode_x,a3_plan_none_x,a3_plan_other_x,a3_plan_other_desc_x,a2_output_outcome_x,a3_current_plan_x,b_sig_inter_new_bike_boxes_x,b_class_1_x,b_class_2_x,b_class_3_x,b_class_4_x,a4_bike_gap_pct_x,b_light_intersection_x,b_mid_block_new_rrfb_signal_x,b_mid_block_surf_improv_x,b_bsp_new_bikes_x,b_bike_new_secured_lockers_x,b_bike_new_racks_x,b_bsp_new_station_x,b_other_bike_improv_1_x,b_other_bike_improv_qty_1_x,b_other_bike_improv_2_x,b_other_bike_improv_qty_2_x,b_light_rdwy_seg_x,b_sig_inter_timing_improv_x,b_un_sig_new_rrfb_signal_x,b_un_sig_cross_surf_improv_x,a4_easement_support_x,m_cls_1_trails_widen_recon_exist_x,m_cls_1_trails_new__less_than_9_x,m_cls_1_trails_new_over_9_x,m_non_cls_trails_new_x,m_other_trail_imprv_1_x,m_other_trail_improv_qty_1_x,m_other_trail_imprv_2_x,m_other_trail_improv_qty_2_x,m_non_cls_widen_recon_exist_x,p_amenities_bench_x,a4_ped_gap_pct_x,p_mid_block_cross_new_rrfb_signal_x,p_light_intersection_x,p_lighting_rdwy_seg_x,p_mid_block_cross_surf_improv_x,p_new_ada_ramp_x,p_sidewlks_new_barrier_protect_x,p_sidewlks_new_4_to_8_x,p_sidewlks_new_over_8_x,p_other_ped_imprv_1_x,p_other_ped_qty_1_x,p_other_ped_imprv_2_x,p_other_ped_qty_2_x,p_reconstruct_ramp_to_ada_stand_x,p_sidewlks_reconstruct_enhance_exist_x,p_sig_inter_enhance_exist_crosswlk_x,p_sig_inter_new_crosswlk_x,p_sig_inter_ped_heads_x,p_sig_inter_shorten_cross_x,p_sig_inter_timing_improv_x,p_amenities_trash_can_x,p_amenities_shade_tree_x,p_amenities_shade_tree_type_x,p_un_sig_inter_new_traff_sig_x,p_un_sig_inter_new_roundabout_x,p_un_sig_inter_new_rrfb_sig_x,p_un_sig_inter_shorten_cross_x,p_un_sig_inter_cross_surface_improv_x,p_sidewlks_widen_existing_x,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1_x,v_speed_feedback_signs_x,v_other_traffic_calming_qty_1_x,v_other_traffic_calming_imprv_2_x,v_other_traffic_calming_qty_2_x,v_remove_right_turn_pocket_x,v_remove_travel_ln_x,v_sig_inter_new_roundabout_x,v_sig_inter_timing_improv_x,v_un_sig_inter_new_traf_sig_x,v_un_sig_inter_new_roundabout_x,app_fk,details_datetime_stamp,a4_reg_init_x,a4_reg_init_pct_x,a4_com_init_x,a4_com_init_pct_x,a4_safe_route_x,a4_safe_route_pct_x,a4_fl_mile_x,a4_fl_mile_pct_x,a4_emp_based_x,a4_emp_based_pct_x,a4_other_ni_x,a4_other_ni_descr_x,a4_other_ni_pct_x,a4_wb_audits_x,a4_bike_classes_x,a4_ped_classes_x,a4_demo_events_x,a4_com_enc_x,a4_le_methods_x,a4_com_meetings_x,a4_classrooms_x,a4_school_assem_x,a4_after_school_x,a4_bike_rodeos_x,a4_mock_cities_x,a4_walk_bus_x,a4_bike_train_x,a4_com_challenges_x,a4_srts_enc_x,a4_srts_le_x,a4_srts_training_x,a4_act_other_1_x,a4_act_other_1_descr_x,a4_act_other_2_x,a4_act_other_2_decr_x,a4_comm_trad_media_x,a4_comm_large_media_x,a4_comm_print_x,a4_comm_social_x,a4_comm_web_x,a4_comm_other_x,a4_comm_other_descr_x,a4_comm_language_x,a4_collab_pub_health_x,a4_collab_le_x,a4_collab_non_profit_x,a4_collab_schools_x,a4_collab_pub_works_x,a4_collab_other_x,a4_colab_other_descr_x,a4_plan_ped_x,a4_plan_bike_x,a4_plan_atp_x,a4_plan_school_routes_x,a4_row_open_street_demo_x,assembly_district_x,congressional_district_x,senate_district_x,#_x,atp_id,awarded_x,ppno_x,ppno_1_x,data_origin_x,geometry,awarded_y,#_y,atp_id_x,ppno_1_y,a3_proj_type_x,a2_county_y,a1_locode_y,a1_imp_agcy_street_y,a1_imp_agcy_city_y,a1_imp_agcy_zip_y,a1_imp_agcy_title_y,a1_imp_agcy_ma_y,a1_imp_agcy_state_ma_num_y,a1_imp_agcy_fed_ma_num_y,a1_proj_partner_exists_y,a1_proj_partner_agcy_y,a1_proj_partner_title_y,assembly_district_y,a2_assem_dist_a_y,a2_assem_dist_b_y,a2_assem_dist_c_y,congressional_district_y,a2_congress_dist_a_y,a2_congress_dist_b_y,a2_congress_dist_c_y,senate_district_y,a2_senate_dist_a_y,a2_senate_dist_b_y,a2_senatedistc_y,a2_info_proj_descr_y,a2_info_proj_loc_y,a2_mop_uza_population_y,a2_mpo_y,a2_past_proj_y,a2_past_proj_qty_y,a2_proj_lat_y,a2_proj_long_y,a2_proj_scope_summary_y,a2_project_location_map_y,a2_rtpa_y,a3_plan_active_trans_y,a3_plan_active_trans_exists_y,a3_plan_bicycle_y,a3_plan_bicycle_exists_y,a3_plan_ped_y,a3_plan_ped_exists_y,a3_plan_srts_y,a3_plan_srts_exists_y,a3_st_bicycle_applies_y,a3_st_bicycle_pct_y,a3_st_num_schools_y,a3_st_ped_applies_y,a3_st_ped_pct_y,a3_st_srts_y,a3_trail_elig_cost_y,a3_trail_fed_funding_y,a3_trail_trans_pct_y,a3_current_plan_y,a3_trails_y,a3_plan_none_y,a3_plan_other_y,a3_plan_other_desc_y,a2_output_outcome_y,b_sig_inter_new_bike_boxes_y,b_class_1_y,b_class_2_y,b_class_3_y,b_class_4_y,a4_bike_gap_pct_y,b_light_intersection_y,b_mid_block_new_rrfb_signal_y,b_mid_block_surf_improv_y,b_bsp_new_bikes_y,b_bike_new_secured_lockers_y,b_bike_new_racks_y,b_bsp_new_station_y,b_other_bike_improv_1_y,b_other_bike_improv_qty_1_y,b_other_bike_improv_2_y,b_other_bike_improv_qty_2_y,b_light_rdwy_seg_y,b_sig_inter_timing_improv_y,b_un_sig_new_rrfb_signal_y,b_un_sig_cross_surf_improv_y,a4_easement_support_y,m_cls_1_trails_widen_recon_exist_y,m_cls_1_trails_new__less_than_9_y,m_cls_1_trails_new_over_9_y,m_non_cls_trails_new_y,m_other_trail_imprv_1_y,m_other_trail_improv_qty_1_y,m_other_trail_imprv_2_y,m_other_trail_improv_qty_2_y,m_non_cls_widen_recon_exist_y,p_amenities_bench_y,a4_ped_gap_pct_y,p_mid_block_cross_new_rrfb_signal_y,p_light_intersection_y,p_lighting_rdwy_seg_y,p_mid_block_cross_surf_improv_y,p_new_ada_ramp_y,p_sidewlks_new_barrier_protect_y,p_sidewlks_new_4_to_8_y,p_sidewlks_new_over_8_y,p_other_ped_imprv_1_y,p_other_ped_qty_1_y,p_other_ped_imprv_2_y,p_other_ped_qty_2_y,p_reconstruct_ramp_to_ada_stand_y,p_sidewlks_reconstruct_enhance_exist_y,p_sig_inter_enhance_exist_crosswlk_y,p_sig_inter_new_crosswlk_y,p_sig_inter_ped_heads_y,p_sig_inter_shorten_cross_y,p_sig_inter_timing_improv_y,p_amenities_trash_can_y,p_amenities_shade_tree_y,p_amenities_shade_tree_type_y,p_un_sig_inter_new_traff_sig_y,p_un_sig_inter_new_roundabout_y,p_un_sig_inter_new_rrfb_sig_y,p_un_sig_inter_shorten_cross_y,p_un_sig_inter_cross_surface_improv_y,p_sidewlks_widen_existing_y,agency_fully_own_r_w,"require_r_w_easement,_from_gov",_require_rw_easement_from_private,v_other_traffic_calming_imprv_1_y,v_speed_feedback_signs_y,v_other_traffic_calming_qty_1_y,v_other_traffic_calming_imprv_2_y,v_other_traffic_calming_qty_2_y,v_remove_right_turn_pocket_y,v_remove_travel_ln_y,v_sig_inter_new_roundabout_y,v_sig_inter_timing_improv_y,v_un_sig_inter_new_traf_sig_y,v_un_sig_inter_new_roundabout_y,a4_reg_init_y,a4_reg_init_pct_y,a4_com_init_y,a4_com_init_pct_y,a4_safe_route_y,a4_safe_route_pct_y,a4_fl_mile_y,a4_fl_mile_pct_y,a4_emp_based_y,a4_emp_based_pct_y,a4_other_ni_y,a4_other_ni_descr_y,a4_other_ni_pct_y,a4_wb_audits_y,a4_bike_classes_y,a4_ped_classes_y,a4_demo_events_y,a4_com_enc_y,a4_le_methods_y,a4_com_meetings_y,a4_classrooms_y,a4_school_assem_y,a4_after_school_y,a4_bike_rodeos_y,a4_mock_cities_y,a4_walk_bus_y,a4_bike_train_y,a4_com_challenges_y,a4_srts_enc_y,a4_srts_le_y,a4_srts_training_y,a4_act_other_1_y,a4_act_other_1_descr_y,a4_act_other_2_y,a4_act_other_2_decr_y,a4_comm_trad_media_y,a4_comm_large_media_y,a4_comm_print_y,a4_comm_social_y,a4_comm_web_y,a4_comm_other_y,a4_comm_other_descr_y,a4_comm_language_y,a4_collab_pub_health_y,a4_collab_le_y,a4_collab_non_profit_y,a4_collab_schools_y,a4_collab_pub_works_y,a4_collab_other_y,a4_colab_other_descr_y,a4_plan_ped_y,a4_plan_bike_y,a4_plan_atp_y,a4_plan_school_routes_y,a4_row_open_street_demo_y,project_status,solicitation_abv,solicitation,soliciting_agency,atp_id_y,ppno_y,ppno1,oversight_id,a3_proj_type_y,project_size,total_project_cost,a1_imp_agcy_name_y,a2_info_proj_name_y,original_prog__amt___pa_ed_,orig__prog__year__pa_ed_,original_prog__amt___ps_e_,orig__prog__year__ps_e_,original_prog__amt___rw_,orig__prog__year__rw_,orignal_prog__amt___con_,orig__prog__year__con_,original_prog__amt___con_ni_,orig__prog__year__con_ni_,total,_2122_pa_ed,_2122_ps_e,_2122_rw,_2122_con,_2122_con_ni,_2122_total,fund_year_1,_2223_pa_ed,_2223_ps_e,_2223_rw,_2223_con,_2223_con_ni,_2223_total,fund_year_2,_2324_pa_ed,_2324_ps_e,_2324_rw,_2324_con,_2324_con_ni,_2324_total,fund_year_3,_2425_pa_ed,_2425_ps_e,_2425_rw,_2425_con,_2425_con_ni,_2425_total,fund_year_4,total_atp_$,match_total_atp_$_and_total,match_total_atp_$_and_total_atp_x_1000,paed,ps_e_,rw,con,con_ni,total_atp__000s_,total_atp_x_1000,year__pa_ed_,year__ps_e_,year__rw_,year__con_,year__con_ni_,data_origin_y,full_merge,compare_desc
882,,,,,,,,,,,,,,,,,,,,7.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,"7-Maywood, City of-1",5.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Y,,ATP5-07-021S,,Plan,LA,5265,4319 Slauson Ave,City of Maywood,90270.0,Vasquez,No,,,No,,,63,0.0,6.0,3.0,40,40.0,,,33,33.0,,,The City of Maywood will develop an Active Tra...,City of Maywood,Project is located within one of the ten large...,SCAG,No,0.0,33.99,-118.19,The City of Maywood will develop an Active Tra...,,,,No,,No,,No,,No,Yes,50.0,8.0,Yes,50.0,Yes,0.0,,,No,,Yes,No,,The outcome will include the delivery of activ...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,N,0.0,N,0.0,N,0.0,N,0.0,,0.0,N,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,Y,N,No,Active,S,STATEWIDE SOLICITATION-CYCLE 5,CALTRANS,ATP5-7-021S,5860,,Standard,Plan,,263000.0,"Maywood, City of",City of Maywood Active Transportation Plan,,,,,,,,,263000.0,21/22,263000.0,,,,,263000.0,263000.0,21/22,,,,,,0.0,22/23,,,,,,0.0,23/24,,,,,,0.0,24/25,263000.0,YES,YES,0.0,0.0,0.0,0.0,263.0,263.0,263000.0,,,,,21/22,Funded,right_only,False
883,,,,,,,,,,,,,,,,,,,,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,"4-Oakland, City of-2",5.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Y,,ATP5-04-033S,,Infrastructure - Large,ALA,5012,250 Frank H Ogawa Plaza,Oakland,94612.0,Funding Program Manager,Yes,00099S,04-5012R,No,,,18,18.0,,,13,13.0,,,9,9.0,,,Neighborhood bike routes on four corridors in ...,"Neighborhood bike routes on 81st Avenue, 85th ...",Project is located within one of the ten large...,MTC,No,0.0,37.75,-122.18,East Oakland's street network currently presen...,,,,No,,Yes,,Yes,,No,Yes,90.0,0.0,Yes,10.0,No,0.0,,,Yes,,No,No,,Construction of four Class III bicycle bouleva...,,,225.0,30780.0,,,,,,,,,,Two Stage Left Turn Box,4.0,Green Backed Sharrow,30.0,,5.0,,,,,,,,,,,,,,,,,,,269.0,,,,Pedestrian Hybrid Beacon,2.0,,,37.0,,49.0,157.0,4.0,4.0,,,,,,6.0,2.0,18.0,,,,,,Speed bumps,,81.0,,,,,,,,,N,0.0,N,0.0,N,0.0,N,0.0,,0.0,N,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,Active,S,STATEWIDE SOLICITATION-CYCLE 5,CALTRANS,ATP5-4-033S,2346,,Standard,Infrastructure,Large,21859000.0,"Oakland, City of",East Oakland Neighborhood Bike Routes,,,,,,,17269000.0,23/24,,,17269000.0,,,,,,0.0,21/22,,,,,,0.0,22/23,,,,17269000.0,,17269000.0,23/24,,,,,,0.0,24/25,17269000.0,YES,YES,0.0,0.0,0.0,17269.0,0.0,17269.0,17269000.0,,,,23/24,,Funded,right_only,False


In [54]:
dfall.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 884 entries, 0 to 883
Columns: 481 entries, a1_imp_agcy_city_x to compare_desc
dtypes: Int64(9), bool(1), category(1), datetime64[ns](2), float64(265), geometry(1), object(202)
memory usage: 3.2+ MB


In [55]:
# columns in common with the dfs we merged
list(master_data.columns.intersection(funded.columns))

['a1_imp_agcy_city',
 'a1_imp_agcy_fed_ma_num',
 'a1_imp_agcy_ma',
 'a1_imp_agcy_name',
 'a1_imp_agcy_state_ma_num',
 'a1_imp_agcy_street',
 'a1_imp_agcy_title',
 'a1_imp_agcy_zip',
 'a1_proj_partner_agcy',
 'a1_proj_partner_exists',
 'a1_proj_partner_title',
 'a2_assem_dist_a',
 'a2_assem_dist_b',
 'a2_assem_dist_c',
 'a2_congress_dist_a',
 'a2_congress_dist_b',
 'a2_congress_dist_c',
 'a2_county',
 'a2_ct_dist',
 'a2_info_proj_descr',
 'a2_info_proj_loc',
 'a2_info_proj_name',
 'a2_mop_uza_population',
 'a2_mpo',
 'a2_past_proj',
 'a2_past_proj_qty',
 'a2_proj_lat',
 'a2_proj_long',
 'a2_proj_scope_summary',
 'a2_project_location_map',
 'a2_rtpa',
 'a2_senate_dist_a',
 'a2_senate_dist_b',
 'a2_senatedistc',
 'a3_plan_active_trans',
 'a3_plan_active_trans_exists',
 'a3_plan_bicycle',
 'a3_plan_bicycle_exists',
 'a3_plan_ped',
 'a3_plan_ped_exists',
 'a3_plan_srts',
 'a3_plan_srts_exists',
 'a3_st_bicycle_applies',
 'a3_st_bicycle_pct',
 'a3_st_num_schools',
 'a3_st_ped_applies',
 'a3_

In [56]:
# (dfall.iloc[:,199:230]).info()

In [57]:
dfall.sample()

Unnamed: 0,a1_imp_agcy_city_x,a1_imp_agcy_fed_ma_num_x,a1_imp_agcy_ma_x,a1_imp_agcy_name_x,a1_imp_agcy_state_ma_num_x,a1_imp_agcy_street_x,a1_imp_agcy_title_x,a1_imp_agcy_zip_x,a1_letter_of_intent,a1_proj_partner_agcy_x,a1_proj_partner_exists_x,a1_proj_partner_title_x,a2_assem_dist_a_x,a2_assem_dist_b_x,a2_assem_dist_c_x,a2_congress_dist_a_x,a2_congress_dist_b_x,a2_congress_dist_c_x,a2_county_x,a2_ct_dist,a2_info_proj_descr_x,a2_info_proj_loc_x,a2_info_proj_name_x,a2_mop_uza_population_x,a2_mpo_x,a2_past_proj_x,a2_past_proj_qty_x,a2_proj_lat_x,a2_proj_long_x,a2_proj_scope_summary_x,a2_project_location_map_x,a2_rtpa_x,a2_senate_dist_a_x,a2_senate_dist_b_x,a2_senatedistc_x,a3_plan_active_trans_x,a3_plan_active_trans_exists_x,a3_plan_bicycle_x,a3_plan_bicycle_exists_x,a3_plan_ped_x,a3_plan_ped_exists_x,a3_plan_srts_x,a3_plan_srts_exists_x,a3_proj_type,a3_st_bicycle_applies_x,a3_st_bicycle_pct_x,a3_st_num_schools_x,a3_st_ped_applies_x,a3_st_ped_pct_x,a3_st_srts_x,a3_trail_elig_cost_x,a3_trail_fed_funding_x,a3_trail_trans_pct_x,a3_trails_x,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_exhibit22_plan,attch_letters_of_support,attch_link,attch_ni_workplan,attch_project_estimate,completed_pdf_form,main_datetime_stamp,project_app_id,project_cycle,a1_locode_x,a3_plan_none_x,a3_plan_other_x,a3_plan_other_desc_x,a2_output_outcome_x,a3_current_plan_x,b_sig_inter_new_bike_boxes_x,b_class_1_x,b_class_2_x,b_class_3_x,b_class_4_x,a4_bike_gap_pct_x,b_light_intersection_x,b_mid_block_new_rrfb_signal_x,b_mid_block_surf_improv_x,b_bsp_new_bikes_x,b_bike_new_secured_lockers_x,b_bike_new_racks_x,b_bsp_new_station_x,b_other_bike_improv_1_x,b_other_bike_improv_qty_1_x,b_other_bike_improv_2_x,b_other_bike_improv_qty_2_x,b_light_rdwy_seg_x,b_sig_inter_timing_improv_x,b_un_sig_new_rrfb_signal_x,b_un_sig_cross_surf_improv_x,a4_easement_support_x,m_cls_1_trails_widen_recon_exist_x,m_cls_1_trails_new__less_than_9_x,m_cls_1_trails_new_over_9_x,m_non_cls_trails_new_x,m_other_trail_imprv_1_x,m_other_trail_improv_qty_1_x,m_other_trail_imprv_2_x,m_other_trail_improv_qty_2_x,m_non_cls_widen_recon_exist_x,p_amenities_bench_x,a4_ped_gap_pct_x,p_mid_block_cross_new_rrfb_signal_x,p_light_intersection_x,p_lighting_rdwy_seg_x,p_mid_block_cross_surf_improv_x,p_new_ada_ramp_x,p_sidewlks_new_barrier_protect_x,p_sidewlks_new_4_to_8_x,p_sidewlks_new_over_8_x,p_other_ped_imprv_1_x,p_other_ped_qty_1_x,p_other_ped_imprv_2_x,p_other_ped_qty_2_x,p_reconstruct_ramp_to_ada_stand_x,p_sidewlks_reconstruct_enhance_exist_x,p_sig_inter_enhance_exist_crosswlk_x,p_sig_inter_new_crosswlk_x,p_sig_inter_ped_heads_x,p_sig_inter_shorten_cross_x,p_sig_inter_timing_improv_x,p_amenities_trash_can_x,p_amenities_shade_tree_x,p_amenities_shade_tree_type_x,p_un_sig_inter_new_traff_sig_x,p_un_sig_inter_new_roundabout_x,p_un_sig_inter_new_rrfb_sig_x,p_un_sig_inter_shorten_cross_x,p_un_sig_inter_cross_surface_improv_x,p_sidewlks_widen_existing_x,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1_x,v_speed_feedback_signs_x,v_other_traffic_calming_qty_1_x,v_other_traffic_calming_imprv_2_x,v_other_traffic_calming_qty_2_x,v_remove_right_turn_pocket_x,v_remove_travel_ln_x,v_sig_inter_new_roundabout_x,v_sig_inter_timing_improv_x,v_un_sig_inter_new_traf_sig_x,v_un_sig_inter_new_roundabout_x,app_fk,details_datetime_stamp,a4_reg_init_x,a4_reg_init_pct_x,a4_com_init_x,a4_com_init_pct_x,a4_safe_route_x,a4_safe_route_pct_x,a4_fl_mile_x,a4_fl_mile_pct_x,a4_emp_based_x,a4_emp_based_pct_x,a4_other_ni_x,a4_other_ni_descr_x,a4_other_ni_pct_x,a4_wb_audits_x,a4_bike_classes_x,a4_ped_classes_x,a4_demo_events_x,a4_com_enc_x,a4_le_methods_x,a4_com_meetings_x,a4_classrooms_x,a4_school_assem_x,a4_after_school_x,a4_bike_rodeos_x,a4_mock_cities_x,a4_walk_bus_x,a4_bike_train_x,a4_com_challenges_x,a4_srts_enc_x,a4_srts_le_x,a4_srts_training_x,a4_act_other_1_x,a4_act_other_1_descr_x,a4_act_other_2_x,a4_act_other_2_decr_x,a4_comm_trad_media_x,a4_comm_large_media_x,a4_comm_print_x,a4_comm_social_x,a4_comm_web_x,a4_comm_other_x,a4_comm_other_descr_x,a4_comm_language_x,a4_collab_pub_health_x,a4_collab_le_x,a4_collab_non_profit_x,a4_collab_schools_x,a4_collab_pub_works_x,a4_collab_other_x,a4_colab_other_descr_x,a4_plan_ped_x,a4_plan_bike_x,a4_plan_atp_x,a4_plan_school_routes_x,a4_row_open_street_demo_x,assembly_district_x,congressional_district_x,senate_district_x,#_x,atp_id,awarded_x,ppno_x,ppno_1_x,data_origin_x,geometry,awarded_y,#_y,atp_id_x,ppno_1_y,a3_proj_type_x,a2_county_y,a1_locode_y,a1_imp_agcy_street_y,a1_imp_agcy_city_y,a1_imp_agcy_zip_y,a1_imp_agcy_title_y,a1_imp_agcy_ma_y,a1_imp_agcy_state_ma_num_y,a1_imp_agcy_fed_ma_num_y,a1_proj_partner_exists_y,a1_proj_partner_agcy_y,a1_proj_partner_title_y,assembly_district_y,a2_assem_dist_a_y,a2_assem_dist_b_y,a2_assem_dist_c_y,congressional_district_y,a2_congress_dist_a_y,a2_congress_dist_b_y,a2_congress_dist_c_y,senate_district_y,a2_senate_dist_a_y,a2_senate_dist_b_y,a2_senatedistc_y,a2_info_proj_descr_y,a2_info_proj_loc_y,a2_mop_uza_population_y,a2_mpo_y,a2_past_proj_y,a2_past_proj_qty_y,a2_proj_lat_y,a2_proj_long_y,a2_proj_scope_summary_y,a2_project_location_map_y,a2_rtpa_y,a3_plan_active_trans_y,a3_plan_active_trans_exists_y,a3_plan_bicycle_y,a3_plan_bicycle_exists_y,a3_plan_ped_y,a3_plan_ped_exists_y,a3_plan_srts_y,a3_plan_srts_exists_y,a3_st_bicycle_applies_y,a3_st_bicycle_pct_y,a3_st_num_schools_y,a3_st_ped_applies_y,a3_st_ped_pct_y,a3_st_srts_y,a3_trail_elig_cost_y,a3_trail_fed_funding_y,a3_trail_trans_pct_y,a3_current_plan_y,a3_trails_y,a3_plan_none_y,a3_plan_other_y,a3_plan_other_desc_y,a2_output_outcome_y,b_sig_inter_new_bike_boxes_y,b_class_1_y,b_class_2_y,b_class_3_y,b_class_4_y,a4_bike_gap_pct_y,b_light_intersection_y,b_mid_block_new_rrfb_signal_y,b_mid_block_surf_improv_y,b_bsp_new_bikes_y,b_bike_new_secured_lockers_y,b_bike_new_racks_y,b_bsp_new_station_y,b_other_bike_improv_1_y,b_other_bike_improv_qty_1_y,b_other_bike_improv_2_y,b_other_bike_improv_qty_2_y,b_light_rdwy_seg_y,b_sig_inter_timing_improv_y,b_un_sig_new_rrfb_signal_y,b_un_sig_cross_surf_improv_y,a4_easement_support_y,m_cls_1_trails_widen_recon_exist_y,m_cls_1_trails_new__less_than_9_y,m_cls_1_trails_new_over_9_y,m_non_cls_trails_new_y,m_other_trail_imprv_1_y,m_other_trail_improv_qty_1_y,m_other_trail_imprv_2_y,m_other_trail_improv_qty_2_y,m_non_cls_widen_recon_exist_y,p_amenities_bench_y,a4_ped_gap_pct_y,p_mid_block_cross_new_rrfb_signal_y,p_light_intersection_y,p_lighting_rdwy_seg_y,p_mid_block_cross_surf_improv_y,p_new_ada_ramp_y,p_sidewlks_new_barrier_protect_y,p_sidewlks_new_4_to_8_y,p_sidewlks_new_over_8_y,p_other_ped_imprv_1_y,p_other_ped_qty_1_y,p_other_ped_imprv_2_y,p_other_ped_qty_2_y,p_reconstruct_ramp_to_ada_stand_y,p_sidewlks_reconstruct_enhance_exist_y,p_sig_inter_enhance_exist_crosswlk_y,p_sig_inter_new_crosswlk_y,p_sig_inter_ped_heads_y,p_sig_inter_shorten_cross_y,p_sig_inter_timing_improv_y,p_amenities_trash_can_y,p_amenities_shade_tree_y,p_amenities_shade_tree_type_y,p_un_sig_inter_new_traff_sig_y,p_un_sig_inter_new_roundabout_y,p_un_sig_inter_new_rrfb_sig_y,p_un_sig_inter_shorten_cross_y,p_un_sig_inter_cross_surface_improv_y,p_sidewlks_widen_existing_y,agency_fully_own_r_w,"require_r_w_easement,_from_gov",_require_rw_easement_from_private,v_other_traffic_calming_imprv_1_y,v_speed_feedback_signs_y,v_other_traffic_calming_qty_1_y,v_other_traffic_calming_imprv_2_y,v_other_traffic_calming_qty_2_y,v_remove_right_turn_pocket_y,v_remove_travel_ln_y,v_sig_inter_new_roundabout_y,v_sig_inter_timing_improv_y,v_un_sig_inter_new_traf_sig_y,v_un_sig_inter_new_roundabout_y,a4_reg_init_y,a4_reg_init_pct_y,a4_com_init_y,a4_com_init_pct_y,a4_safe_route_y,a4_safe_route_pct_y,a4_fl_mile_y,a4_fl_mile_pct_y,a4_emp_based_y,a4_emp_based_pct_y,a4_other_ni_y,a4_other_ni_descr_y,a4_other_ni_pct_y,a4_wb_audits_y,a4_bike_classes_y,a4_ped_classes_y,a4_demo_events_y,a4_com_enc_y,a4_le_methods_y,a4_com_meetings_y,a4_classrooms_y,a4_school_assem_y,a4_after_school_y,a4_bike_rodeos_y,a4_mock_cities_y,a4_walk_bus_y,a4_bike_train_y,a4_com_challenges_y,a4_srts_enc_y,a4_srts_le_y,a4_srts_training_y,a4_act_other_1_y,a4_act_other_1_descr_y,a4_act_other_2_y,a4_act_other_2_decr_y,a4_comm_trad_media_y,a4_comm_large_media_y,a4_comm_print_y,a4_comm_social_y,a4_comm_web_y,a4_comm_other_y,a4_comm_other_descr_y,a4_comm_language_y,a4_collab_pub_health_y,a4_collab_le_y,a4_collab_non_profit_y,a4_collab_schools_y,a4_collab_pub_works_y,a4_collab_other_y,a4_colab_other_descr_y,a4_plan_ped_y,a4_plan_bike_y,a4_plan_atp_y,a4_plan_school_routes_y,a4_row_open_street_demo_y,project_status,solicitation_abv,solicitation,soliciting_agency,atp_id_y,ppno_y,ppno1,oversight_id,a3_proj_type_y,project_size,total_project_cost,a1_imp_agcy_name_y,a2_info_proj_name_y,original_prog__amt___pa_ed_,orig__prog__year__pa_ed_,original_prog__amt___ps_e_,orig__prog__year__ps_e_,original_prog__amt___rw_,orig__prog__year__rw_,orignal_prog__amt___con_,orig__prog__year__con_,original_prog__amt___con_ni_,orig__prog__year__con_ni_,total,_2122_pa_ed,_2122_ps_e,_2122_rw,_2122_con,_2122_con_ni,_2122_total,fund_year_1,_2223_pa_ed,_2223_ps_e,_2223_rw,_2223_con,_2223_con_ni,_2223_total,fund_year_2,_2324_pa_ed,_2324_ps_e,_2324_rw,_2324_con,_2324_con_ni,_2324_total,fund_year_3,_2425_pa_ed,_2425_ps_e,_2425_rw,_2425_con,_2425_con_ni,_2425_total,fund_year_4,total_atp_$,match_total_atp_$_and_total,match_total_atp_$_and_total_atp_x_1000,paed,ps_e_,rw,con,con_ni,total_atp__000s_,total_atp_x_1000,year__pa_ed_,year__ps_e_,year__rw_,year__con_,year__con_ni_,data_origin_y,full_merge,compare_desc
533,Orinda,04-5444F15,Yes,"Orinda, City of",04-5444F15,22 Orinda Way,Public Works Director,94563.0,,,No,,16,,,11,,,Contra Costa,4.0,"Remove 2” HMA, Install 6” Asphalt Pavement, In...",The Project is located at the three-way Inters...,Safe Routes to School - Glorietta Elementary S...,Project is located within one of the ten large...,MTC,Yes,1.0,,,The Safe Routes to School – Glorietta Elementa...,,,7,,,,No,,Yes,,Yes,,No,Infrastructure - Small,Yes,49.0,1.0,Yes,51.0,Yes,,,0.0,No,1.0,3218.0,Supplemental_Glorietta Elementary Parents' Clu...,Orinda_ATP_Attach-A-Signature_G (002) -signed.pdf,Orinda_ATP_Attach-E_Photos_G.docx,Orinda_ATP_Attach-D_Layout Plans.pdf,Orinda_ATP_Attach-B-Engr-Checklist_G.PDF,,Orinda_ATP_Letters of Support.pdf,,,Orinda_ATP_Attach-F-Project Estimate.xlsx,,2022-06-09 11:08:55,"4-Orinda, City of-1",6.0,5444,0,0,,"Remove 3,950 SF 2” HMA, Install 3,950 SF 6” As...",Yes,,,,,,,,,,,,,,,,,,,,,2.0,,,,,,,,,,,,100.0,,,,,,,,,,,,,2.0,,,,,,,,,,,,,,2.0,,Yes,No,No,,,,,,,,,,,,3218.0,2022-06-09 11:08:55,N,0.0,N,0.0,N,0.0,N,0.0,,,N,,0.0,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,16,11,7,,,N,,,Application,POINT EMPTY,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only,False


In [58]:
dfall>>select(_.awarded_x, _.awarded_y,
               # _['#_x'], _['#_y'],
               # _.ppno_x, _.ppno_y,
               # _.ppno_1_x, _.ppno_1_y,
               _.a2_info_proj_descr_x, _.a2_info_proj_descr_y)>>filter(_.a2_info_proj_descr_y.notnull())

Unnamed: 0,awarded_x,awarded_y,a2_info_proj_descr_x,a2_info_proj_descr_y
8,N,Y,Design and construct buffered bike lanes on De...,Design and construct buffered bike lanes on De...
33,N,Y,This project focuses on school and pedestrian ...,This project focuses on school and pedestrian ...
35,N,Y,Construct Class IV separated bikeways with Cla...,Construct Class IV separated bikeways with Cla...
60,N,Y,Construction of .8 miles of Segment 7 of the R...,Construction of .8 miles of Segment 7 of the R...
66,N,Y,The Laurel Elementary SRTS includes infrastruc...,The Laurel Elementary SRTS includes infrastruc...
77,N,Y,Amador & Trinity and Church & Waldby intersect...,Amador & Trinity and Church & Waldby intersect...
81,N,Y,"Construct new curb, gutter, sidewalks, ADA ram...","Construct new curb, gutter, sidewalks, ADA ram..."
83,N,Y,"Completion of PS&E, ROW acquisition, and const...","Completion of PS&E, ROW acquisition, and const..."
91,N,Y,"In Happy Camp on SR 96, install sidewalks, con...","In Happy Camp on SR 96, install sidewalks, con..."
96,N,Y,"Construct 68 curb ramps, 87 crosswalks, advanc...","Construct 68 curb ramps, 87 crosswalks, advanc..."


In [59]:
compare_desc = np.where(dfall["a2_info_proj_descr_x"] == dfall["a2_info_proj_descr_y"], True, False)
dfall["compare_proj_desc"] = compare_desc

In [60]:
dfall.compare_proj_desc.value_counts()

False    837
True      47
Name: compare_proj_desc, dtype: int64

In [61]:
#check that there are no mismatched entries
dfall>>filter(_.compare_proj_desc==False)>>select(_.a2_info_proj_descr_x,  _.a2_info_proj_descr_y)>>arrange(_.a2_info_proj_descr_y)

Unnamed: 0,a2_info_proj_descr_x,a2_info_proj_descr_y
883,,Neighborhood bike routes on four corridors in ...
882,,The City of Maywood will develop an Active Tra...
0,"PA&ED, PS&E, and CON funding for construction ...",
1,Bishop Street Class 3 Bicycle Boulevard with T...,
2,CON funding for installing bicycling facilitie...,
...,...,...
877,"Construction funding for Class IV bikelanes, ...",
878,Construct Class II bike lane segments; install...,
879,This project will decouple 2nd and 3rd street ...,
880,"Curb extensions, sidewalks and crosswalks for ...",


### test using concat

In [62]:
master_data.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 882 entries, 0 to 881
Columns: 218 entries, a1_imp_agcy_city to geometry
dtypes: Int64(9), datetime64[ns](2), float64(96), geometry(1), int64(16), object(94)
memory usage: 1.5+ MB


In [63]:
funded.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 49 entries, 0 to 48
Columns: 264 entries, awarded to data_origin
dtypes: float64(155), object(109)
memory usage: 101.4+ KB


In [64]:
dfall2 = (pd.concat([master_data, funded]))

In [65]:
dfall2.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 931 entries, 0 to 48
Columns: 287 entries, a1_imp_agcy_city to year__con_ni_
dtypes: Float64(9), datetime64[ns](2), float64(150), geometry(1), object(125)
memory usage: 2.1+ MB


In [66]:
dfall2.sample()

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_exhibit22_plan,attch_letters_of_support,attch_link,attch_ni_workplan,attch_project_estimate,completed_pdf_form,main_datetime_stamp,project_app_id,project_cycle,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,assembly_district,congressional_district,senate_district,#,atp_id,awarded,ppno,ppno_1,data_origin,geometry,atp_id_x,a3_proj_type_x,agency_fully_own_r_w,"require_r_w_easement,_from_gov",_require_rw_easement_from_private,project_status,solicitation_abv,solicitation,soliciting_agency,atp_id_y,ppno1,oversight_id,a3_proj_type_y,project_size,total_project_cost,original_prog__amt___pa_ed_,orig__prog__year__pa_ed_,original_prog__amt___ps_e_,orig__prog__year__ps_e_,original_prog__amt___rw_,orig__prog__year__rw_,orignal_prog__amt___con_,orig__prog__year__con_,original_prog__amt___con_ni_,orig__prog__year__con_ni_,total,_2122_pa_ed,_2122_ps_e,_2122_rw,_2122_con,_2122_con_ni,_2122_total,fund_year_1,_2223_pa_ed,_2223_ps_e,_2223_rw,_2223_con,_2223_con_ni,_2223_total,fund_year_2,_2324_pa_ed,_2324_ps_e,_2324_rw,_2324_con,_2324_con_ni,_2324_total,fund_year_3,_2425_pa_ed,_2425_ps_e,_2425_rw,_2425_con,_2425_con_ni,_2425_total,fund_year_4,total_atp_$,match_total_atp_$_and_total,match_total_atp_$_and_total_atp_x_1000,paed,ps_e_,rw,con,con_ni,total_atp__000s_,total_atp_x_1000,year__pa_ed_,year__ps_e_,year__rw_,year__con_,year__con_ni_
467,Madera,5941,Yes,Madera County,5941,200 W. 4th Street,Deputy Public Works Director,93637.0,,,No,,5.0,,,4.0,,,Madera,6.0,Install sidewalk and bike lanes along Road 41...,"The location of this project is on Road 417, g...",Road 417 Pedestrian Facilities Project,Project is located outside one of the large MP...,MCTC,No,0.0,,,This project will construct pedestrian and bic...,,,8.0,,,,Yes,,No,,No,,No,Infrastructure - Medium,Yes,50.0,0.0,Yes,50.0,No,,,0.0,No,22.0,3473.0,,Signed.pdf,Attachment-E-Photos.pdf,Attachment-D-Project-Layout Rev1.pdf,Attachment-B-Engr-Checklist Rev1.pdf,,Letters of Support.pdf,,,Attachment-F-Estimate Rev1.pdf,,2022-06-15 08:26:33,6-Madera County-22,6,5941,0,0,,"The construction of approximately 14,000 feet ...",No,,,13995.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,8.0,,13995.0,,,,,,,,,,,,,,,,,,,,,,No,Yes,Yes,,,,,,,,,,,,3473.0,2022-06-15 08:26:33,N,0.0,N,0.0,N,0.0,N,0.0,,,N,,0.0,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,5,4,8,,,N,,,Application,POINT EMPTY,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [67]:
len(dfall2>>group_by(_.project_app_id, _.project_cycle)>>summarize(n = _.awarded.nunique())>>arrange(_.n)>>filter(_.n>1))

47

In [68]:
dfall2>>group_by(_.project_app_id, _.project_cycle)>>summarize(n = _.awarded.nunique())>>arrange(_.n)>>filter(_.n>1)

Unnamed: 0,project_app_id,project_cycle,n
2,"1-Arcata, City of-1",5,2
3,"1-Clearlake, City of-1",5,2
36,10-Mariposa County-1,5,2
37,10-Mariposa County-2,5,2
77,10-Tuolumne County-1,5,2
100,"11-Imperial Beach, City of-1",5,2
112,"11-National City, City of-3",5,2
114,"11-Oceanside, City of-1",5,2
118,11-San Diego Association of Governments (SANDA...,5,2
223,2-Karuk Tribe-1,5,2


In [69]:
dfall2.awarded.value_counts()

N    882
Y     49
Name: awarded, dtype: int64

In [70]:
len(dfall2)

931

In [71]:
#dfall2['awarded'] = dfall2['awarded'].astype('category') 
# define the valid categories: 
#dfall2['awarded'] = dfall2['awarded'].cat.set_categories(['Y', 'N'], ordered=True) 

In [72]:
dfall2.sort_values(['project_app_id','a2_proj_scope_summary', 'project_cycle', 'awarded'], inplace=True, ascending=True) 

In [105]:
(dfall2>>select(_.project_app_id, _.a2_proj_scope_summary, _.project_cycle, _.awarded, _.data_origin)).head(50)

Unnamed: 0,project_app_id,a2_proj_scope_summary,project_cycle,awarded,data_origin
180,03-El Dorado County-1,The Project consists of a Class I multi-use gr...,5.0,N,Application
165,03-El Dorado County-2,The overall Diamond Springs Parkway project co...,5.0,N,Application
18,"1-Arcata, City of-1",The Arcata Annie & Mary Trail Connectivity Pro...,5.0,Y,Funded
175,"1-Arcata, City of-1",The Arcata Annie & Mary Trail Connectivity Pro...,5.0,N,Application
21,"1-Clearlake, City of-1",The project will expand bicycle and pedestrian...,5.0,Y,Funded
194,"1-Clearlake, City of-1",The project will expand bicycle and pedestrian...,5.0,N,Application
831,"1-Eureka, City of-1",The Bay to Zoo Trail creates approximately two...,6.0,N,Application
299,"1-Eureka, City of-1",The Bay to Zoo Trail creates approximately two...,5.0,N,Application
102,"1-Eureka, City of-2",C Street (between Waterfront Drive and Harris ...,5.0,N,Application
841,"1-Eureka, City of-2",C Street (between Waterfront Drive and Harris ...,6.0,N,Application


In [74]:
dfall3 = dfall2.sort_values('awarded').drop_duplicates(subset=['project_app_id','a2_proj_scope_summary','project_cycle'], keep='first')

In [107]:
dfall3>>select(_.project_app_id, _.a2_proj_scope_summary, _.project_cycle, _.awarded, _.data_origin)>>arrange(_.project_app_id)

Unnamed: 0,project_app_id,a2_proj_scope_summary,project_cycle,awarded,data_origin
180,03-El Dorado County-1,The Project consists of a Class I multi-use gr...,5,N,Application
165,03-El Dorado County-2,The overall Diamond Springs Parkway project co...,5,N,Application
18,"1-Arcata, City of-1",The Arcata Annie & Mary Trail Connectivity Pro...,5.00,Y,Funded
21,"1-Clearlake, City of-1",The project will expand bicycle and pedestrian...,5.00,Y,Funded
831,"1-Eureka, City of-1",The Bay to Zoo Trail creates approximately two...,6,N,Application
...,...,...,...,...,...
73,9-California Department of Transportation-6,This project will construct an ADA accessible ...,5,N,Application
168,"9-Tehachapi, City of-1",Dennison Road is one of three major north-sout...,5,N,Application
448,"9-Tehachapi, City of-1",This 0.5 mile stretch of Valley Boulevard serv...,6,N,Application
509,"9-Tehachapi, City of-2",The residential neighborhood north of Union Pa...,6,N,Application


In [108]:
(dfall3>>arrange(_.project_app_id, _.project_cycle)>>select(_.project_app_id, _.project_cycle, _.awarded, _.data_origin)).head(50)

Unnamed: 0,project_app_id,project_cycle,awarded,data_origin
180,03-El Dorado County-1,5.0,N,Application
165,03-El Dorado County-2,5.0,N,Application
18,"1-Arcata, City of-1",5.0,Y,Funded
21,"1-Clearlake, City of-1",5.0,Y,Funded
299,"1-Eureka, City of-1",5.0,N,Application
831,"1-Eureka, City of-1",6.0,N,Application
102,"1-Eureka, City of-2",5.0,N,Application
841,"1-Eureka, City of-2",6.0,N,Application
774,"1-Fortuna, City of-1",6.0,N,Application
267,1-Humboldt County Association of Governments-1,5.0,N,Application


In [77]:
#should have 882 - same number as the application data...
len(dfall3)

884

In [78]:
## the two that might show up twice may be from the changes in application name 
## same that did not merge in the first attempt
#dfall>>filter(_.full_merge=='right_only')>>select(_.project_app_id, _.a2_proj_scope_summary_x, _.project_cycle, _.awarded_x)>>arrange(_.project_app_id)

In [79]:
#dfall>>filter(_.project_app_id=='4-Oakland, City of-2')>>select(_.project_app_id,_.a1_imp_agcy_name_x, _.a1_imp_agcy_name_y, _.project_cycle)

In [80]:
dfall3>>group_by(_.project_app_id, _.project_cycle)>>summarize(n = _.awarded.nunique())>>arrange(_.n)

Unnamed: 0,project_app_id,project_cycle,n
0,03-El Dorado County-1,5.00,1
1,03-El Dorado County-2,5.00,1
2,"1-Arcata, City of-1",5.00,1
3,"1-Clearlake, City of-1",5.00,1
4,"1-Eureka, City of-1",5.00,1
...,...,...,...
879,9-California Department of Transportation-6,5.00,1
880,"9-Tehachapi, City of-1",5.00,1
881,"9-Tehachapi, City of-1",6.00,1
882,"9-Tehachapi, City of-2",5.00,1


In [81]:
dfall3.sample()

Unnamed: 0,a1_imp_agcy_city,a1_imp_agcy_fed_ma_num,a1_imp_agcy_ma,a1_imp_agcy_name,a1_imp_agcy_state_ma_num,a1_imp_agcy_street,a1_imp_agcy_title,a1_imp_agcy_zip,a1_letter_of_intent,a1_proj_partner_agcy,a1_proj_partner_exists,a1_proj_partner_title,a2_assem_dist_a,a2_assem_dist_b,a2_assem_dist_c,a2_congress_dist_a,a2_congress_dist_b,a2_congress_dist_c,a2_county,a2_ct_dist,a2_info_proj_descr,a2_info_proj_loc,a2_info_proj_name,a2_mop_uza_population,a2_mpo,a2_past_proj,a2_past_proj_qty,a2_proj_lat,a2_proj_long,a2_proj_scope_summary,a2_project_location_map,a2_rtpa,a2_senate_dist_a,a2_senate_dist_b,a2_senatedistc,a3_plan_active_trans,a3_plan_active_trans_exists,a3_plan_bicycle,a3_plan_bicycle_exists,a3_plan_ped,a3_plan_ped_exists,a3_plan_srts,a3_plan_srts_exists,a3_proj_type,a3_st_bicycle_applies,a3_st_bicycle_pct,a3_st_num_schools,a3_st_ped_applies,a3_st_ped_pct,a3_st_srts,a3_trail_elig_cost,a3_trail_fed_funding,a3_trail_trans_pct,a3_trails,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_exhibit22_plan,attch_letters_of_support,attch_link,attch_ni_workplan,attch_project_estimate,completed_pdf_form,main_datetime_stamp,project_app_id,project_cycle,a1_locode,a3_plan_none,a3_plan_other,a3_plan_other_desc,a2_output_outcome,a3_current_plan,b_sig_inter_new_bike_boxes,b_class_1,b_class_2,b_class_3,b_class_4,a4_bike_gap_pct,b_light_intersection,b_mid_block_new_rrfb_signal,b_mid_block_surf_improv,b_bsp_new_bikes,b_bike_new_secured_lockers,b_bike_new_racks,b_bsp_new_station,b_other_bike_improv_1,b_other_bike_improv_qty_1,b_other_bike_improv_2,b_other_bike_improv_qty_2,b_light_rdwy_seg,b_sig_inter_timing_improv,b_un_sig_new_rrfb_signal,b_un_sig_cross_surf_improv,a4_easement_support,m_cls_1_trails_widen_recon_exist,m_cls_1_trails_new__less_than_9,m_cls_1_trails_new_over_9,m_non_cls_trails_new,m_other_trail_imprv_1,m_other_trail_improv_qty_1,m_other_trail_imprv_2,m_other_trail_improv_qty_2,m_non_cls_widen_recon_exist,p_amenities_bench,a4_ped_gap_pct,p_mid_block_cross_new_rrfb_signal,p_light_intersection,p_lighting_rdwy_seg,p_mid_block_cross_surf_improv,p_new_ada_ramp,p_sidewlks_new_barrier_protect,p_sidewlks_new_4_to_8,p_sidewlks_new_over_8,p_other_ped_imprv_1,p_other_ped_qty_1,p_other_ped_imprv_2,p_other_ped_qty_2,p_reconstruct_ramp_to_ada_stand,p_sidewlks_reconstruct_enhance_exist,p_sig_inter_enhance_exist_crosswlk,p_sig_inter_new_crosswlk,p_sig_inter_ped_heads,p_sig_inter_shorten_cross,p_sig_inter_timing_improv,p_amenities_trash_can,p_amenities_shade_tree,p_amenities_shade_tree_type,p_un_sig_inter_new_traff_sig,p_un_sig_inter_new_roundabout,p_un_sig_inter_new_rrfb_sig,p_un_sig_inter_shorten_cross,p_un_sig_inter_cross_surface_improv,p_sidewlks_widen_existing,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1,v_speed_feedback_signs,v_other_traffic_calming_qty_1,v_other_traffic_calming_imprv_2,v_other_traffic_calming_qty_2,v_remove_right_turn_pocket,v_remove_travel_ln,v_sig_inter_new_roundabout,v_sig_inter_timing_improv,v_un_sig_inter_new_traf_sig,v_un_sig_inter_new_roundabout,app_fk,details_datetime_stamp,a4_reg_init,a4_reg_init_pct,a4_com_init,a4_com_init_pct,a4_safe_route,a4_safe_route_pct,a4_fl_mile,a4_fl_mile_pct,a4_emp_based,a4_emp_based_pct,a4_other_ni,a4_other_ni_descr,a4_other_ni_pct,a4_wb_audits,a4_bike_classes,a4_ped_classes,a4_demo_events,a4_com_enc,a4_le_methods,a4_com_meetings,a4_classrooms,a4_school_assem,a4_after_school,a4_bike_rodeos,a4_mock_cities,a4_walk_bus,a4_bike_train,a4_com_challenges,a4_srts_enc,a4_srts_le,a4_srts_training,a4_act_other_1,a4_act_other_1_descr,a4_act_other_2,a4_act_other_2_decr,a4_comm_trad_media,a4_comm_large_media,a4_comm_print,a4_comm_social,a4_comm_web,a4_comm_other,a4_comm_other_descr,a4_comm_language,a4_collab_pub_health,a4_collab_le,a4_collab_non_profit,a4_collab_schools,a4_collab_pub_works,a4_collab_other,a4_colab_other_descr,a4_plan_ped,a4_plan_bike,a4_plan_atp,a4_plan_school_routes,a4_row_open_street_demo,assembly_district,congressional_district,senate_district,#,atp_id,awarded,ppno,ppno_1,data_origin,geometry,atp_id_x,a3_proj_type_x,agency_fully_own_r_w,"require_r_w_easement,_from_gov",_require_rw_easement_from_private,project_status,solicitation_abv,solicitation,soliciting_agency,atp_id_y,ppno1,oversight_id,a3_proj_type_y,project_size,total_project_cost,original_prog__amt___pa_ed_,orig__prog__year__pa_ed_,original_prog__amt___ps_e_,orig__prog__year__ps_e_,original_prog__amt___rw_,orig__prog__year__rw_,orignal_prog__amt___con_,orig__prog__year__con_,original_prog__amt___con_ni_,orig__prog__year__con_ni_,total,_2122_pa_ed,_2122_ps_e,_2122_rw,_2122_con,_2122_con_ni,_2122_total,fund_year_1,_2223_pa_ed,_2223_ps_e,_2223_rw,_2223_con,_2223_con_ni,_2223_total,fund_year_2,_2324_pa_ed,_2324_ps_e,_2324_rw,_2324_con,_2324_con_ni,_2324_total,fund_year_3,_2425_pa_ed,_2425_ps_e,_2425_rw,_2425_con,_2425_con_ni,_2425_total,fund_year_4,total_atp_$,match_total_atp_$_and_total,match_total_atp_$_and_total_atp_x_1000,paed,ps_e_,rw,con,con_ni,total_atp__000s_,total_atp_x_1000,year__pa_ed_,year__ps_e_,year__rw_,year__con_,year__con_ni_
10,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702.0,,,No,,69.0,,,46.0,,,Orange,12.0,Pedestrian traffic safety improvements for San...,"In the City of Santa Ana, the safe routes to s...","Santa Ana High School, Heninger Elementary and...",Project is located within one of the ten large...,SCAG,Yes,2.0,33.74,117.88,"This project will be repairing, replacing and ...",,,34.0,,,,Yes,,Yes,,No,,Yes,Infrastructure - Medium,No,,3.0,Yes,100.0,Yes,,,0.0,No,7.0,1807.0,Attachment K.pdf,Attachment A - Signature Page.pdf,Attachment E - Photos.pdf,Attachment D - Plans or MapR.pdf,Attachment B - Checklist.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F_ Cost Estimate.pdf,,2020-08-21 09:05:27,"12-Santa Ana, City of-7",5,5063,No,No,,"Construct curb extensions at 11 intersections,...",Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,7.0,,,,,,,,65.0,1000.0,7.0,,,,,,,,,,3.0,10.0,,,Yes,No,No,,,,,,,,,,,,1807.0,2020-08-21 09:05:27,N,0.0,N,0.0,N,0.0,N,0.0,,0.0,N,,0.0,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,69,46,34,,,N,,,Application,POINT (117.87628 33.74363),,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [82]:
# #unnamed cols originating in Funded data -- UPDATE NOW FIXED MANUALLY
# unnamed_cols = [col for col in funded.columns if isinstance(col, str) and re.match('unnamed.*', col)]
# unnamed_cols

## function for combining data - works

In [83]:
def join_funding_and_app_data(df_funding,
                              df_app,
                              awarded_col: list = [],
                             sort_values_cols: list = [],
                             subset_cols: list = []
                             ):
    '''
    columns in the funded and application data that we want to use
    awarded_col= ['awarded'],
    sort_values_cols = ['project_app_id','a2_proj_scope_summary', 'project_cycle', 'awarded'],
    subset_cols = ['project_app_id','a2_proj_scope_summary','project_cycle']
    '''
    # concat the funding and app dataframes
    df = (pd.concat([df_app, df_funding]))
    
    # take the awarded column and convert to a category so we can order by this column
    df[awarded_col] = df[awarded_col].astype('category') 
    df[awarded_col] = df[awarded_col].cat.set_categories(['Y', 'N'], ordered=True) 
    
    # sort values based on columns we defined (usually key like unique id, cycle)
    #df = df.sort_values(sort_values_cols, inplace=True, ascending=True) 
    
    # drop duplicates so we only get the funded data instead of the application data for a project that is selected
    df_final = df.sort_values(awarded_col).drop_duplicates(subset=subset_cols, keep='first')
    
    return df_final

In [84]:
df_test = join_funding_and_app_data(funded,
                              master_data,
                              awarded_col= 'awarded',
                             sort_values_cols= ['project_app_id','a2_proj_scope_summary', 'project_cycle', 'awarded'],
                             subset_cols = ['project_app_id','a2_proj_scope_summary','project_cycle']
                             )

In [85]:
df_test.sort_values(['project_app_id','a2_proj_scope_summary', 'project_cycle', 'awarded'], inplace=True, ascending=True) 

In [109]:
(df_test>>select(_.project_app_id, _.a2_proj_scope_summary, _.project_cycle, _.awarded, _.data_origin)).head(50)

Unnamed: 0,project_app_id,a2_proj_scope_summary,project_cycle,awarded,data_origin
180,03-El Dorado County-1,The Project consists of a Class I multi-use gr...,5.0,N,Application
165,03-El Dorado County-2,The overall Diamond Springs Parkway project co...,5.0,N,Application
18,"1-Arcata, City of-1",The Arcata Annie & Mary Trail Connectivity Pro...,5.0,Y,Funded
21,"1-Clearlake, City of-1",The project will expand bicycle and pedestrian...,5.0,Y,Funded
831,"1-Eureka, City of-1",The Bay to Zoo Trail creates approximately two...,6.0,N,Application
299,"1-Eureka, City of-1",The Bay to Zoo Trail creates approximately two...,5.0,N,Application
102,"1-Eureka, City of-2",C Street (between Waterfront Drive and Harris ...,5.0,N,Application
841,"1-Eureka, City of-2",C Street (between Waterfront Drive and Harris ...,6.0,N,Application
774,"1-Fortuna, City of-1",Pedestrian and bicycle facilities are intermit...,6.0,N,Application
267,1-Humboldt County Association of Governments-1,The purpose of HCAOG's Humboldt Regional Activ...,5.0,N,Application


In [111]:
df_test>>group_by(_.project_cycle)>>count(_.data_origin)

Unnamed: 0,project_cycle,data_origin,n
0,5,Application,401
1,5,Funded,49
2,6,Application,434


#### Check what columns are the duplicates

In [90]:
#https://stackoverflow.com/questions/61793094/find-column-whose-name-contains-a-specific-value-that-is-in-a-fixed-column

In [91]:
import re                                                               

In [92]:
dcolumns = [col for col in dfall.columns if isinstance(col, str) and re.match('.*_x', col)]

In [93]:
len(dcolumns)

196

In [94]:
dcolumns

['a1_imp_agcy_city_x',
 'a1_imp_agcy_fed_ma_num_x',
 'a1_imp_agcy_ma_x',
 'a1_imp_agcy_name_x',
 'a1_imp_agcy_state_ma_num_x',
 'a1_imp_agcy_street_x',
 'a1_imp_agcy_title_x',
 'a1_imp_agcy_zip_x',
 'a1_proj_partner_agcy_x',
 'a1_proj_partner_exists_x',
 'a1_proj_partner_title_x',
 'a2_assem_dist_a_x',
 'a2_assem_dist_b_x',
 'a2_assem_dist_c_x',
 'a2_congress_dist_a_x',
 'a2_congress_dist_b_x',
 'a2_congress_dist_c_x',
 'a2_county_x',
 'a2_info_proj_descr_x',
 'a2_info_proj_loc_x',
 'a2_info_proj_name_x',
 'a2_mop_uza_population_x',
 'a2_mpo_x',
 'a2_past_proj_x',
 'a2_past_proj_qty_x',
 'a2_proj_lat_x',
 'a2_proj_long_x',
 'a2_proj_scope_summary_x',
 'a2_project_location_map_x',
 'a2_rtpa_x',
 'a2_senate_dist_a_x',
 'a2_senate_dist_b_x',
 'a2_senatedistc_x',
 'a3_plan_active_trans_x',
 'a3_plan_active_trans_exists_x',
 'a3_plan_bicycle_x',
 'a3_plan_bicycle_exists_x',
 'a3_plan_ped_x',
 'a3_plan_ped_exists_x',
 'a3_plan_srts_x',
 'a3_plan_srts_exists_x',
 'a3_st_bicycle_applies_x',
 '

In [95]:
def remove_duplicate_cols(df, col_list):
    for col_x in col_list:
        df[col_x] = df[col_x].fillna(df[col_y])
    

In [96]:
dfall

Unnamed: 0,a1_imp_agcy_city_x,a1_imp_agcy_fed_ma_num_x,a1_imp_agcy_ma_x,a1_imp_agcy_name_x,a1_imp_agcy_state_ma_num_x,a1_imp_agcy_street_x,a1_imp_agcy_title_x,a1_imp_agcy_zip_x,a1_letter_of_intent,a1_proj_partner_agcy_x,a1_proj_partner_exists_x,a1_proj_partner_title_x,a2_assem_dist_a_x,a2_assem_dist_b_x,a2_assem_dist_c_x,a2_congress_dist_a_x,a2_congress_dist_b_x,a2_congress_dist_c_x,a2_county_x,a2_ct_dist,a2_info_proj_descr_x,a2_info_proj_loc_x,a2_info_proj_name_x,a2_mop_uza_population_x,a2_mpo_x,a2_past_proj_x,a2_past_proj_qty_x,a2_proj_lat_x,a2_proj_long_x,a2_proj_scope_summary_x,a2_project_location_map_x,a2_rtpa_x,a2_senate_dist_a_x,a2_senate_dist_b_x,a2_senatedistc_x,a3_plan_active_trans_x,a3_plan_active_trans_exists_x,a3_plan_bicycle_x,a3_plan_bicycle_exists_x,a3_plan_ped_x,a3_plan_ped_exists_x,a3_plan_srts_x,a3_plan_srts_exists_x,a3_proj_type,a3_st_bicycle_applies_x,a3_st_bicycle_pct_x,a3_st_num_schools_x,a3_st_ped_applies_x,a3_st_ped_pct_x,a3_st_srts_x,a3_trail_elig_cost_x,a3_trail_fed_funding_x,a3_trail_trans_pct_x,a3_trails_x,agency_app_num,app_pk,attch_addtl_attachments,attch_app_sig_page,attch_conditions_photos,attch_conditions_project_map,attch_engineeers_checklist,attch_exhibit22_plan,attch_letters_of_support,attch_link,attch_ni_workplan,attch_project_estimate,completed_pdf_form,main_datetime_stamp,project_app_id,project_cycle,a1_locode_x,a3_plan_none_x,a3_plan_other_x,a3_plan_other_desc_x,a2_output_outcome_x,a3_current_plan_x,b_sig_inter_new_bike_boxes_x,b_class_1_x,b_class_2_x,b_class_3_x,b_class_4_x,a4_bike_gap_pct_x,b_light_intersection_x,b_mid_block_new_rrfb_signal_x,b_mid_block_surf_improv_x,b_bsp_new_bikes_x,b_bike_new_secured_lockers_x,b_bike_new_racks_x,b_bsp_new_station_x,b_other_bike_improv_1_x,b_other_bike_improv_qty_1_x,b_other_bike_improv_2_x,b_other_bike_improv_qty_2_x,b_light_rdwy_seg_x,b_sig_inter_timing_improv_x,b_un_sig_new_rrfb_signal_x,b_un_sig_cross_surf_improv_x,a4_easement_support_x,m_cls_1_trails_widen_recon_exist_x,m_cls_1_trails_new__less_than_9_x,m_cls_1_trails_new_over_9_x,m_non_cls_trails_new_x,m_other_trail_imprv_1_x,m_other_trail_improv_qty_1_x,m_other_trail_imprv_2_x,m_other_trail_improv_qty_2_x,m_non_cls_widen_recon_exist_x,p_amenities_bench_x,a4_ped_gap_pct_x,p_mid_block_cross_new_rrfb_signal_x,p_light_intersection_x,p_lighting_rdwy_seg_x,p_mid_block_cross_surf_improv_x,p_new_ada_ramp_x,p_sidewlks_new_barrier_protect_x,p_sidewlks_new_4_to_8_x,p_sidewlks_new_over_8_x,p_other_ped_imprv_1_x,p_other_ped_qty_1_x,p_other_ped_imprv_2_x,p_other_ped_qty_2_x,p_reconstruct_ramp_to_ada_stand_x,p_sidewlks_reconstruct_enhance_exist_x,p_sig_inter_enhance_exist_crosswlk_x,p_sig_inter_new_crosswlk_x,p_sig_inter_ped_heads_x,p_sig_inter_shorten_cross_x,p_sig_inter_timing_improv_x,p_amenities_trash_can_x,p_amenities_shade_tree_x,p_amenities_shade_tree_type_x,p_un_sig_inter_new_traff_sig_x,p_un_sig_inter_new_roundabout_x,p_un_sig_inter_new_rrfb_sig_x,p_un_sig_inter_shorten_cross_x,p_un_sig_inter_cross_surface_improv_x,p_sidewlks_widen_existing_x,a4_row_100,a4_row_gov_ease,a4_row_private_ease,v_other_traffic_calming_imprv_1_x,v_speed_feedback_signs_x,v_other_traffic_calming_qty_1_x,v_other_traffic_calming_imprv_2_x,v_other_traffic_calming_qty_2_x,v_remove_right_turn_pocket_x,v_remove_travel_ln_x,v_sig_inter_new_roundabout_x,v_sig_inter_timing_improv_x,v_un_sig_inter_new_traf_sig_x,v_un_sig_inter_new_roundabout_x,app_fk,details_datetime_stamp,a4_reg_init_x,a4_reg_init_pct_x,a4_com_init_x,a4_com_init_pct_x,a4_safe_route_x,a4_safe_route_pct_x,a4_fl_mile_x,a4_fl_mile_pct_x,a4_emp_based_x,a4_emp_based_pct_x,a4_other_ni_x,a4_other_ni_descr_x,a4_other_ni_pct_x,a4_wb_audits_x,a4_bike_classes_x,a4_ped_classes_x,a4_demo_events_x,a4_com_enc_x,a4_le_methods_x,a4_com_meetings_x,a4_classrooms_x,a4_school_assem_x,a4_after_school_x,a4_bike_rodeos_x,a4_mock_cities_x,a4_walk_bus_x,a4_bike_train_x,a4_com_challenges_x,a4_srts_enc_x,a4_srts_le_x,a4_srts_training_x,a4_act_other_1_x,a4_act_other_1_descr_x,a4_act_other_2_x,a4_act_other_2_decr_x,a4_comm_trad_media_x,a4_comm_large_media_x,a4_comm_print_x,a4_comm_social_x,a4_comm_web_x,a4_comm_other_x,a4_comm_other_descr_x,a4_comm_language_x,a4_collab_pub_health_x,a4_collab_le_x,a4_collab_non_profit_x,a4_collab_schools_x,a4_collab_pub_works_x,a4_collab_other_x,a4_colab_other_descr_x,a4_plan_ped_x,a4_plan_bike_x,a4_plan_atp_x,a4_plan_school_routes_x,a4_row_open_street_demo_x,assembly_district_x,congressional_district_x,senate_district_x,#_x,atp_id,awarded_x,ppno_x,ppno_1_x,data_origin_x,geometry,awarded_y,#_y,atp_id_x,ppno_1_y,a3_proj_type_x,a2_county_y,a1_locode_y,a1_imp_agcy_street_y,a1_imp_agcy_city_y,a1_imp_agcy_zip_y,a1_imp_agcy_title_y,a1_imp_agcy_ma_y,a1_imp_agcy_state_ma_num_y,a1_imp_agcy_fed_ma_num_y,a1_proj_partner_exists_y,a1_proj_partner_agcy_y,a1_proj_partner_title_y,assembly_district_y,a2_assem_dist_a_y,a2_assem_dist_b_y,a2_assem_dist_c_y,congressional_district_y,a2_congress_dist_a_y,a2_congress_dist_b_y,a2_congress_dist_c_y,senate_district_y,a2_senate_dist_a_y,a2_senate_dist_b_y,a2_senatedistc_y,a2_info_proj_descr_y,a2_info_proj_loc_y,a2_mop_uza_population_y,a2_mpo_y,a2_past_proj_y,a2_past_proj_qty_y,a2_proj_lat_y,a2_proj_long_y,a2_proj_scope_summary_y,a2_project_location_map_y,a2_rtpa_y,a3_plan_active_trans_y,a3_plan_active_trans_exists_y,a3_plan_bicycle_y,a3_plan_bicycle_exists_y,a3_plan_ped_y,a3_plan_ped_exists_y,a3_plan_srts_y,a3_plan_srts_exists_y,a3_st_bicycle_applies_y,a3_st_bicycle_pct_y,a3_st_num_schools_y,a3_st_ped_applies_y,a3_st_ped_pct_y,a3_st_srts_y,a3_trail_elig_cost_y,a3_trail_fed_funding_y,a3_trail_trans_pct_y,a3_current_plan_y,a3_trails_y,a3_plan_none_y,a3_plan_other_y,a3_plan_other_desc_y,a2_output_outcome_y,b_sig_inter_new_bike_boxes_y,b_class_1_y,b_class_2_y,b_class_3_y,b_class_4_y,a4_bike_gap_pct_y,b_light_intersection_y,b_mid_block_new_rrfb_signal_y,b_mid_block_surf_improv_y,b_bsp_new_bikes_y,b_bike_new_secured_lockers_y,b_bike_new_racks_y,b_bsp_new_station_y,b_other_bike_improv_1_y,b_other_bike_improv_qty_1_y,b_other_bike_improv_2_y,b_other_bike_improv_qty_2_y,b_light_rdwy_seg_y,b_sig_inter_timing_improv_y,b_un_sig_new_rrfb_signal_y,b_un_sig_cross_surf_improv_y,a4_easement_support_y,m_cls_1_trails_widen_recon_exist_y,m_cls_1_trails_new__less_than_9_y,m_cls_1_trails_new_over_9_y,m_non_cls_trails_new_y,m_other_trail_imprv_1_y,m_other_trail_improv_qty_1_y,m_other_trail_imprv_2_y,m_other_trail_improv_qty_2_y,m_non_cls_widen_recon_exist_y,p_amenities_bench_y,a4_ped_gap_pct_y,p_mid_block_cross_new_rrfb_signal_y,p_light_intersection_y,p_lighting_rdwy_seg_y,p_mid_block_cross_surf_improv_y,p_new_ada_ramp_y,p_sidewlks_new_barrier_protect_y,p_sidewlks_new_4_to_8_y,p_sidewlks_new_over_8_y,p_other_ped_imprv_1_y,p_other_ped_qty_1_y,p_other_ped_imprv_2_y,p_other_ped_qty_2_y,p_reconstruct_ramp_to_ada_stand_y,p_sidewlks_reconstruct_enhance_exist_y,p_sig_inter_enhance_exist_crosswlk_y,p_sig_inter_new_crosswlk_y,p_sig_inter_ped_heads_y,p_sig_inter_shorten_cross_y,p_sig_inter_timing_improv_y,p_amenities_trash_can_y,p_amenities_shade_tree_y,p_amenities_shade_tree_type_y,p_un_sig_inter_new_traff_sig_y,p_un_sig_inter_new_roundabout_y,p_un_sig_inter_new_rrfb_sig_y,p_un_sig_inter_shorten_cross_y,p_un_sig_inter_cross_surface_improv_y,p_sidewlks_widen_existing_y,agency_fully_own_r_w,"require_r_w_easement,_from_gov",_require_rw_easement_from_private,v_other_traffic_calming_imprv_1_y,v_speed_feedback_signs_y,v_other_traffic_calming_qty_1_y,v_other_traffic_calming_imprv_2_y,v_other_traffic_calming_qty_2_y,v_remove_right_turn_pocket_y,v_remove_travel_ln_y,v_sig_inter_new_roundabout_y,v_sig_inter_timing_improv_y,v_un_sig_inter_new_traf_sig_y,v_un_sig_inter_new_roundabout_y,a4_reg_init_y,a4_reg_init_pct_y,a4_com_init_y,a4_com_init_pct_y,a4_safe_route_y,a4_safe_route_pct_y,a4_fl_mile_y,a4_fl_mile_pct_y,a4_emp_based_y,a4_emp_based_pct_y,a4_other_ni_y,a4_other_ni_descr_y,a4_other_ni_pct_y,a4_wb_audits_y,a4_bike_classes_y,a4_ped_classes_y,a4_demo_events_y,a4_com_enc_y,a4_le_methods_y,a4_com_meetings_y,a4_classrooms_y,a4_school_assem_y,a4_after_school_y,a4_bike_rodeos_y,a4_mock_cities_y,a4_walk_bus_y,a4_bike_train_y,a4_com_challenges_y,a4_srts_enc_y,a4_srts_le_y,a4_srts_training_y,a4_act_other_1_y,a4_act_other_1_descr_y,a4_act_other_2_y,a4_act_other_2_decr_y,a4_comm_trad_media_y,a4_comm_large_media_y,a4_comm_print_y,a4_comm_social_y,a4_comm_web_y,a4_comm_other_y,a4_comm_other_descr_y,a4_comm_language_y,a4_collab_pub_health_y,a4_collab_le_y,a4_collab_non_profit_y,a4_collab_schools_y,a4_collab_pub_works_y,a4_collab_other_y,a4_colab_other_descr_y,a4_plan_ped_y,a4_plan_bike_y,a4_plan_atp_y,a4_plan_school_routes_y,a4_row_open_street_demo_y,project_status,solicitation_abv,solicitation,soliciting_agency,atp_id_y,ppno_y,ppno1,oversight_id,a3_proj_type_y,project_size,total_project_cost,a1_imp_agcy_name_y,a2_info_proj_name_y,original_prog__amt___pa_ed_,orig__prog__year__pa_ed_,original_prog__amt___ps_e_,orig__prog__year__ps_e_,original_prog__amt___rw_,orig__prog__year__rw_,orignal_prog__amt___con_,orig__prog__year__con_,original_prog__amt___con_ni_,orig__prog__year__con_ni_,total,_2122_pa_ed,_2122_ps_e,_2122_rw,_2122_con,_2122_con_ni,_2122_total,fund_year_1,_2223_pa_ed,_2223_ps_e,_2223_rw,_2223_con,_2223_con_ni,_2223_total,fund_year_2,_2324_pa_ed,_2324_ps_e,_2324_rw,_2324_con,_2324_con_ni,_2324_total,fund_year_3,_2425_pa_ed,_2425_ps_e,_2425_rw,_2425_con,_2425_con_ni,_2425_total,fund_year_4,total_atp_$,match_total_atp_$_and_total,match_total_atp_$_and_total_atp_x_1000,paed,ps_e_,rw,con,con_ni,total_atp__000s_,total_atp_x_1000,year__pa_ed_,year__ps_e_,year__rw_,year__con_,year__con_ni_,data_origin_y,full_merge,compare_desc,compare_proj_desc
0,Merced,10-5939R,Yes,Merced County,00033S,345 west 7th street,Deputy Director,95340.00,,,No,,21,,,16,,,Merced,10.00,"PA&ED, PS&E, and CON funding for construction ...",1) South side of Haskell Ave from Cody ave to ...,Planada Sidewalk Infill Project,Project is located outside one of the ten larg...,MCAG,No,0.00,37.29,120.31,The Planada Sidewalk Infill Project is located...,,,12,,,,No,,Yes,,Yes,,No,Infrastructure - Small,Yes,20.00,1.00,Yes,80.00,Yes,,,0.00,No,1.00,1802.00,Planada Sidewalk infill ATP cross section 1.pdf,Attachment A- Signature Page.pdf,Existing Photos Attachment.pdf,Planada ATP Plan Concept.pdf,Attachment-B-Engr-Checklist (MH).pdf,,Letters of Support.pdf,,,Project Estimate.pdf,,2020-06-09 10:33:08,10-Merced County-1,5.00,5939,No,No,,Sidewalk infill along portions of Haskell aven...,No,,,,1500.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.00,,,,,6.00,,1500.00,,,,,,5.00,,4.00,3.00,,,,,,,,,,,,,No,No,Yes,,,,,,,,,,,,1802.00,2020-06-09 10:33:08,N,0.00,N,0.00,N,0.00,N,0.00,,0.00,N,,0.00,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,21,16,12,,,N,,,Application,POINT (120.31282 37.29159),,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only,False,False
1,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702.00,,,No,,69,,,46,,,Orange,12.00,Bishop Street Class 3 Bicycle Boulevard with T...,Bishop Street from Flower Street to Standard A...,Bishop Street Bicycle Boulevard Project,Project is located within one of the ten large...,SCAG,Yes,2.00,33.74,117.86,This project will implement a Class 3 bicycle ...,,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Medium,Yes,50.00,0.00,Yes,50.00,No,,,0.00,No,4.00,1811.00,Attachment K - Not Applicable.pdf,Attachment A - Signature Page.pdf,Attachment E - Photos of Existing Conditions.pdf,Attachment D - Project .Plans.pdf,Attachment B - Checklist.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost .Estimate.pdf,,2020-08-20 18:49:12,"12-Santa Ana, City of-4",5.00,5063,No,No,,"Install 1.15 mile bike boulevard, construction...",Yes,,,,6336.00,,,,,,,,,,,,,,,2.00,,,,,,,,,,,,,,100.00,,,,,,,,,,,,,38.00,,15.00,16.00,,18.00,3.00,,,,1.00,6.00,,18.00,,,Yes,No,No,,,,,,,8800.00,,,,,1811.00,2020-08-20 18:49:12,N,0.00,N,0.00,N,0.00,N,0.00,,0.00,N,,0.00,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,69,46,34,,,N,,,Application,POINT (117.86443 33.73947),,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only,False,False
2,City of Pacifica,04-5350-F15,Yes,"Pacifica, City of",,151 Milagra Drive,Associate Civil Engineer,94044.00,,,No,,22,,,14,,,San Mateo,4.00,CON funding for installing bicycling facilitie...,On Palmetto Ave between Paloma Ave and West Av...,Palmetto Ave - Esplanade Ave Bicycle & Pedestr...,Project is located outside one of the ten larg...,MTC,No,0.00,37.65,-122.49,The project will install a combination of Clas...,,,13,,,,No,,Yes,,Yes,,No,Infrastructure - Small,Yes,50.00,2.00,Yes,50.00,No,,,0.00,No,1.00,1804.00,,Attachment-A-Signature-page.pdf,Photos.pdf,Attachment D_Palmetto & Esplanade Ped-Bike Imp...,Attachment B_Engineers Checklist.pdf,,Letters of Support.pdf,,,Attachment F_ ATP Cycle 5_Palmetto-Esplanade B...,,2020-06-15 11:05:03,"4-Pacifica, City of-1",5.00,5350,0,0,,Bicycling and pedestrian amenities will be ins...,Yes,,,13752.00,5748.00,,,,,,,,,,,,,,,,1.00,,,,,,,,,,,,,40.00,2.00,,,,20.00,,,,,,,,9.00,,,,,,,,,,,,,,,,Yes,No,No,,,,,,,,,,,,1804.00,2020-06-15 11:05:03,N,0.00,N,0.00,N,0.00,N,0.00,,0.00,N,,0.00,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,22,14,13,,,N,,,Application,POINT (-122.49178 37.64730),,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only,False,False
3,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702.00,,,No,,69,,,46,,,Orange,12.00,Pedestrian traffic safety improvements for Jef...,"In the City of Santa Ana, the safe routes to s...",Jefferson ES_Thorpe Fundamental_McFadden Int_G...,Project is located within one of the ten large...,SCAG,Yes,2.00,33.71,117.89,"This project will be repairing, replacing and ...",,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Large,No,,5.00,Yes,100.00,Yes,,,0.00,No,13.00,1822.00,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D -Plans.pdf,Attachment B - Check list.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,,2020-09-08 10:15:52,"12-Santa Ana, City of-13",5.00,5063,No,No,,"Construct curb extensions at 8 intersections, ...",Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,50.00,,,,,60.00,,,,Left Turn Arrow,3.00,Enhanced Crosswalk Unsignalized,3.00,218.00,1000.00,7.00,,,1.00,,,,,,,,7.00,,,Yes,No,No,,,,,,,,,,,,1822.00,2020-09-08 10:15:52,N,0.00,N,0.00,N,0.00,N,0.00,,0.00,N,,0.00,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,69,46,34,,,N,,,Application,POINT (117.89494 33.71126),,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only,False,False
4,Santa Ana,12-5063,Yes,"Santa Ana, City of",00289S,"20 Civic Center Plaza, M-43",Senior Civil Engineer,92702.00,,,No,,69,,,46,,,Orange,12.00,Pedestrian traffic safety improvements for La...,"In the City of Santa Ana, the safe routes to s...",Lathrop Intermediate_Lowell ES_Martin ES_Pio P...,Project is located within one of the ten large...,SCAG,Yes,4.00,33.73,117.87,"This project will be repairing, replacing and ...",,,34,,,,Yes,,Yes,,No,,Yes,Infrastructure - Large,No,,5.00,Yes,100.00,Yes,,,0.00,No,14.00,1823.00,Attachment K.pdf,Attachment A.pdf,Attachment E - Photos.pdf,Attachment D - Plan.pdf,Attachment B - Checklist.pdf,,Attachment I - Letter of Support.pdf,,Attachment G - Not Applicable.pdf,Attachment F - Cost Estimate.pdf,,2020-08-31 12:34:31,"12-Santa Ana, City of-14",5.00,5063,No,No,,"Construct curb extensions at 6 intersections, ...",Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,50.00,,,,,43.00,,,,Enhance crosswalk (unsignalized),7.00,Raised Crosswalk,2.00,189.00,3455.00,5.00,,,1.00,,,,,,,2.00,5.00,,,Yes,No,No,,,,,,,,,,,,1823.00,2020-08-31 12:34:31,N,0.00,N,0.00,N,0.00,N,0.00,,0.00,N,,0.00,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,69,46,34,,,N,,,Application,POINT (117.86884 33.73240),,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
879,San Jose,04-5005F15,Yes,"San Jose, City of",00200S,200 E Santa Clara St,Senior Transportation Specialist,95113.00,,,No,,25,,,19,,,Santa Clara,4.00,This project will decouple 2nd and 3rd street ...,The project is in SoFA arts district in southw...,2nd & 3rd Street De-Coupling and Complete Stre...,Project is located within one of the ten large...,MTC,Yes,1.00,,,"The City of San José, through its Downtown Tra...",,,15,,,,No,,Yes,,No,,No,Infrastructure - Large,Yes,60.00,0.00,Yes,40.00,No,,,0.00,No,3.00,3860.00,attachment k.pdf,Attachment-A-Signature-Page (1)_jr (1).pdf,Attachment_G_Site_Photos.pdf,2_3DESIGNS.pdf,Attachment-B-Engr-Checklist- 2nd and 3rd.pdf,,LOS.pdf,,,2nd and 3rd ATP Engineers Estimate_Final.pdf,,2022-06-16 10:00:19,"4-San Jose, City of-3",6.00,5005,0,0,"Emerging mobility Action Plan, Carbon Neutral ...",Project constructs approximately 6840 feet of ...,Yes,9.00,,,,6840.00,,,,,,,,,Bike Ramps,8.00,Raised Intersections,6.00,,,,4.00,,,,,,,,,,,,2.00,,,,,,,,,Fully Bulbed (all 4 corners),60.00,,,48.00,6840.00,8.00,,,2.00,,,,,,,,4.00,4.00,6840.00,Yes,No,No,Conversion of 1 to 2 way operation,,,<---- 0.68 miles,,,,,,,,3860.00,2022-06-16 10:00:19,N,0.00,N,0.00,N,0.00,N,0.00,,,N,,0.00,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,N,N,No,25,19,15,,,N,,,Application,POINT EMPTY,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only,False,False
880,Santa Barbara,05-5951R,Yes,Santa Barbara County,00100S,123 E. Anapuma St,Alternative Transportation Manager,93101.00,,,No,,,3,7,,2,4,Santa Barbara,5.00,"Curb extensions, sidewalks and crosswalks for ...",Unincorporated neighborhood located south of E...,Isla Vista Bike and Pedestrian Improvements Pr...,Project is located outside one of the large MP...,SBCAG,Yes,2.00,,,"Isla Vista is a place like no other. 15,733 pe...",,,,1,9,,Yes,,No,,No,,No,Infrastructure + NI - Medium,Yes,25.00,1.00,Yes,75.00,Yes,,,0.00,No,2.00,3845.00,,Attachment A_Signature Page - 2022.pdf,Existing Conditions Photos.pdf,Isla Vista Community Improvements - ATP Cycle ...,Attachment B-Engr Checklist IV.pdf,,Attachment I - Letters of Support 2022.pdf,,Attachment-G-Exhibit-25-R-NI-Work-Plan - Isla ...,Attachment-F-Project-Estimate-IV Updated.pdf,,2022-06-16 12:25:34,5-Santa Barbara County-2,6.00,5951,0,0,Regional Transportation Plan,"Curb extensions, sidewalks, and bicycle networ...",Yes,2.00,,1700.00,22410.00,,,,,,,,,,Bike left-hand turn lanes,10.00,Class 2 conflict / intersection striping,24.00,,,,,,,,,,,,,,,,20.00,1.00,,,1.00,110.00,,2820.00,,,,,,23.00,,,,,,,,,,,,,42.00,17.00,2700.00,No,No,Yes,,,,,,,,,,,,3845.00,2022-06-16 12:25:34,Y,50.00,N,0.00,Y,50.00,N,0.00,,,N,,0.00,2.00,4.00,4.00,,,,,,,4.00,2.00,,,,,,,,,,,,N,N,Y,Y,N,N,,"Spanish, Mandarin",Y,N,Y,N,N,N,,N,N,N,N,No,37,24,19,,,N,,,Application,POINT EMPTY,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only,False,False
881,Salinas,056143R,Yes,Transportation Agency for Monterey County,74A0797,55-B Plaza Circle,Transportation Planner,93901.00,FORTAG LOS-City of Marina.pdf,City of Marina,Yes,Public Works Director/City Engineer,30,,,20,,,Monterey,5.00,Construction of a 1.8 mile segment of the plan...,FORTAG Segment 3 is located in the City of Mar...,Fort Ord Regional Trail and Greenway: Californ...,Project is located outside one of the large MP...,AMBAG,Yes,3.00,,,The Fort Ord Regional Trail and Greenway (FORT...,,TAMC,17,,,,Yes,,No,,No,,Yes,Infrastructure + NI - Large,Yes,50.00,4.00,Yes,50.00,Yes,,Yes,100.00,Yes,1.00,3816.00,TAMC_FORTAG_Attachment_K (School Health Data).pdf,FORTAG_CaliforniaAve_Attachment_A (Signed).pdf,FORTAG California Ave. Photos of Existing Cond...,FORTAG_CaliforniaAve_Attachment_D (Project Pla...,FORTAG_CaliforniaAve_Attachment_B (Engineeers ...,,FORTAG_CaliforniaAve_Attachment_I (Letters of ...,,Attachment-G-Exhibit-25-R-NI-Work-Plan_FORTAG.pdf,Attachment-F-Project-Estimate - FORTAG Califor...,,2022-06-23 13:00:02,5-Transportation Agency for Monterey County-1,6.00,6143,0,0,,Construct a 1.8 mile segment of FORTAG in the ...,Yes,,,,,,,,,,,,,,,,,,,,,,,,,6811.00,,,,,,,10.00,100.00,,,,,8.00,,,,,,,,2.00,,4.00,,,4.00,4.00,5.00,,,,,,,,,Yes,No,No,,,,,,,,,,,,3816.00,2022-06-23 13:00:02,N,0.00,Y,30.00,Y,70.00,N,0.00,,,N,,0.00,,12.00,32.00,,,,,40.00,12.00,,16.00,12.00,1.00,,,,,8.00,4.00,Basic bike maintenance workshops,8.00,Family group rides,Y,N,Y,Y,Y,N,,Spanish,Y,Y,Y,Y,Y,Y,Regional Transportation Planning Agency,N,N,N,N,No,30,20,17,,,N,,,Application,POINT EMPTY,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only,False,False
882,,,,,,,,,,,,,,,,,,,,7.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,"7-Maywood, City of-1",5.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Y,,ATP5-07-021S,,Plan,LA,5265,4319 Slauson Ave,City of Maywood,90270.00,Vasquez,No,,,No,,,63,0.00,6.00,3.00,40,40.00,,,33,33.00,,,The City of Maywood will develop an Active Tra...,City of Maywood,Project is located within one of the ten large...,SCAG,No,0.00,33.99,-118.19,The City of Maywood will develop an Active Tra...,,,,No,,No,,No,,No,Yes,50.00,8.00,Yes,50.00,Yes,0.00,,,No,,Yes,No,,The outcome will include the delivery of activ...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,N,0.00,N,0.00,N,0.00,N,0.00,,0.00,N,,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,,0.00,,N,N,N,N,N,N,,,N,N,N,N,N,N,,N,N,Y,N,No,Active,S,STATEWIDE SOLICITATION-CYCLE 5,CALTRANS,ATP5-7-021S,5860,,Standard,Plan,,263000.00,"Maywood, City of",City of Maywood Active Transportation Plan,,,,,,,,,263000.00,21/22,263000.00,,,,,263000.00,263000.00,21/22,,,,,,0.00,22/23,,,,,,0.00,23/24,,,,,,0.00,24/25,263000.00,YES,YES,0.00,0.00,0.00,0.00,263.00,263.00,263000.00,,,,,21/22,Funded,right_only,False,False


In [97]:
## list of all columns
#col_list = sorted(dfall.columns.to_list())

In [98]:
#col_list

In [99]:
#dfall.columns.get_loc("a3_current_plan_x")

In [100]:
# (dfall>>select(_.a3_proj_type, 
#                _.a3_proj_type_x,
#               _.a3_proj_type_y)).info()

remove cols: 
* a3_proj_type_x

In [101]:
(dfall>>select(_.a3_proj_type, 
               _.a3_proj_type_x,
              _.a3_proj_type_y))>>arrange(_.a3_proj_type_y)

Unnamed: 0,a3_proj_type,a3_proj_type_x,a3_proj_type_y
35,Infrastructure + NI - Large,Infrastructure + NI - Large,Combined (IF and NI)
60,Infrastructure + NI - Large,Infrastructure + NI - Large,Combined (IF and NI)
66,Infrastructure + NI - Small,Infrastructure + NI - Small,Combined (IF and NI)
77,Infrastructure + NI - Small,Infrastructure + NI - Small,Combined (IF and NI)
96,Infrastructure + NI - Small,Infrastructure + NI - Small,Combined (IF and NI)
...,...,...,...
877,Infrastructure - Small,,
878,Infrastructure - Small,,
879,Infrastructure - Large,,
880,Infrastructure + NI - Medium,,


In [102]:
#(dfall.iloc[:,64:70]).info()