In [1]:
import pandas as pd 
import sqlalchemy 
import sys 
import re
import oracledb 

In [2]:
oracledb.version = "8.3.0" 
sys.modules["cx_Oracle"] = oracledb 

In [3]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [5]:
DIALECT = 'oracle'  

In [6]:
ENGINE_PATH_WIN_AUTH =  f"{DIALECT}://{USERNAME}:{PASSWORD}@{HOST}:{PORT}/?service_name={SERVICE}" 

In [7]:
engine = sqlalchemy.create_engine(ENGINE_PATH_WIN_AUTH)   

In [8]:
def to_snakecase(df):
    df.columns = df.columns.str.lower().str.replace(' ','_')
    return df

## Projects
* Each project has a unique row
* Use status_code to filter only for active projects

In [9]:
projects_df = pd.read_sql_query(""" 
SELECT 
projects.project_id,
projects.county_code,
projects.comment_desc,
projects.district_code, 
projects.est_total_prj_costs,
projects.location_name,
projects.project_label_name,
projects.original_post_mile_begin_id,
projects.original_post_mile_end_id,
projects.revised_post_mile_begin_ind,
projects.revised_post_mile_end_ind,
projects.route_name,
projects.state_hwy_ind,
projects.senate_district_code,
local_agencies.agency_name,
local_agencies.urban_area_code,
counties.county_name,
work_types.work_type_desc,
project_category_type_codes.category_desc
FROM projects 
LEFT JOIN local_agencies ON projects.agency_code = local_agencies.agency_code
LEFT JOIN counties ON projects.county_code = counties.county_code
LEFT JOIN work_types ON projects.work_type_code = work_types.work_type_code
LEFT JOIN project_category_type_codes ON projects.project_category_type_code = project_category_type_codes.category_code
WHERE projects.status_code = 'Active'
""", engine) 

In [11]:
projects_df.comment_desc = projects_df.comment_desc.fillna(projects_df.category_desc)

In [12]:
projects_df.comment_desc = projects_df.comment_desc.fillna(projects_df.work_type_desc)

In [13]:
projects_df = projects_df.drop(columns = ['category_desc','work_type_desc'])

In [14]:
projects_df['current_phase'] = 'single phase'

In [15]:
projects_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11645 entries, 0 to 11644
Data columns (total 19 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   project_id                   11645 non-null  object 
 1   county_code                  11640 non-null  object 
 2   comment_desc                 10766 non-null  object 
 3   district_code                11644 non-null  object 
 4   est_total_prj_costs          976 non-null    float64
 5   location_name                11277 non-null  object 
 6   project_label_name           10791 non-null  object 
 7   project_planning_id          2048 non-null   object 
 8   original_post_mile_begin_id  750 non-null    float64
 9   original_post_mile_end_id    576 non-null    float64
 10  revised_post_mile_begin_ind  20 non-null     object 
 11  revised_post_mile_end_ind    15 non-null     object 
 12  route_name                   11324 non-null  object 
 13  state_hwy_ind   

In [16]:
projects_df.sample(3)

Unnamed: 0,project_id,county_code,comment_desc,district_code,est_total_prj_costs,location_name,project_label_name,project_planning_id,original_post_mile_begin_id,original_post_mile_end_id,revised_post_mile_begin_ind,revised_post_mile_end_ind,route_name,state_hwy_ind,senate_district_code,agency_name,urban_area_code,county_name,current_phase
1070,5006(791),5953,Unique Project ID; HSIP6-07-017\n\n08/1/18 3 LOCATIONS ONLY at CON,7,,"Olympic Blvd/Orme Ave, Olympic Blvd/Camulos St, Olympic Blvd/S Dacotah St",Install Activated Pedestrian Warning Devices (APWDs); construct bulb-outs,,,,,,0-LA,N,,Los Angeles,3041.0,Los Angeles County,single phase
3538,5108(017),5953,inactive,7,,ORANGE AVE.: PACIFIC COAST HWY. TO 20TH ST.,RESURF./RECON PAVEMENT/WIDEN/SIGNALS,,0.0,0.0,,,0-LBCH,N,,Long Beach,3041.0,Los Angeles County,single phase
10375,15J7(021),5936,LTP-SCRCO-018\n\nEO only,5,892755.0,Soquel San Jose PM 6.59,Road Reconstruction,,,,,,0-CR,N,,Santa Cruz County,,Santa Cruz County,single phase


## EA Number
* Projects can have multiple EA numbers.
* Should we keep the most recent EA or all of them -> Ask Tony Hunt.

In [17]:
ea_df = pd.read_sql_query(""" 
SELECT 
project_id, 
district_code,
ea_assign_date, 
expense_authorization_id 
FROM expense_authorizations
""", engine) 

In [18]:
ea_df.shape, ea_df.project_id.nunique()

((49431, 4), 24130)

In [19]:
# Do an outer join to understand what's going on under the hood
# outer_join = pd.merge(ea_df, projects_df4, on = ['district_code','project_id'], how = "outer", indicator = True)

In [20]:
# outer_join[['_merge']].value_counts()

In [21]:
# Understand why there are more rows compared to project_ids that are unique
# outer_join.loc[outer_join._merge == "both"][['project_id']].nunique()

In [22]:
# Do an inner merge to get only projects we care about
ea_df = pd.merge(projects_df, ea_df, on = ['district_code','project_id'], how = "inner")

In [23]:
ea_og_cols = ['district_code', 'expense_authorization_id', 'project_id',
      'ea_assign_date']

In [24]:
# Keep only original columns 
ea_df = ea_df[ea_og_cols]

In [25]:
len(ea_df)

3030

In [26]:
ea_df.project_id.value_counts().describe()

count   2944.00
mean       1.03
std        0.18
min        1.00
25%        1.00
50%        1.00
75%        1.00
max        3.00
Name: project_id, dtype: float64

In [27]:
ea_df.project_id.nunique()

2944

In [28]:
ea_df.project_id.value_counts().head()

5006(504)    3
5953(536)    3
5932(042)    3
5006(635)    3
5008(072)    3
Name: project_id, dtype: int64

In [29]:
ea_df.loc[ea_df.project_id == '5006(635)']

Unnamed: 0,district_code,expense_authorization_id,project_id,ea_assign_date
2381,7,4S6608,5006(635),2009-09-10 13:58:44
2382,7,933575,5006(635),2009-07-02 14:46:18
2383,7,4U4414,5006(635),2009-09-10 13:56:35


In [30]:
# Keep only the most recent EA number so only one EA number per project??
ea_df2 = (ea_df
          .sort_values(['ea_assign_date'], ascending = False)
          .drop_duplicates(subset=['project_id','district_code'])
          .drop(columns = ['ea_assign_date'])
          .reset_index(drop = True)
         )

In [31]:
ea_df2.project_id.nunique()

2944

In [32]:
ea_df2.head()

Unnamed: 0,district_code,expense_authorization_id,project_id
0,4,1Q7614,6204(135)
1,4,985981,6480(026)
2,4,985980,5933(171)
3,9,955175,6142(034)
4,4,985979,5178(016)


In [33]:
ea_df2.expense_authorization_id.nunique()

2873

#### The same EA number matches to multiple projects
* Understand why this is happening
* It seems like the same EA number matches multiple projects that have nothing in common.

In [34]:
#ea_df.loc[ea_df.expense_authorization_id == "924969"]

In [35]:
#ea_df.loc[ea_df.expense_authorization_id == "924360"]

In [36]:
#ea_df2.project_id.nunique() == len(ea_df2)

In [37]:
#len(projects_df6) == len(projects_df)

* Shares the EA of 924360

In [38]:
#projects_df6.loc[projects_df6.project_id == '0061(025)'][preview_cols]

## EFIS_MV_BUD_STRU_94_LVL_3_VW

In [39]:
efis_df = pd.read_sql_query(""" 
SELECT 
adv_project_id,
fund_code,
pec_code,
appropriation_category_code,
curr_bud_am,
cash_exp_am,
pect_task_code
FROM EFIS_MV_BUD_STRU_94_LVL_3_VW
WHERE pec_code LIKE '%2030%'
""", engine) 

In [40]:
efis_df.shape

(45133, 7)

In [41]:
efis_df.adv_project_id.nunique()

19497

### Efis Join

* In SQL: Filtering out 9's also eliminates nulls, which means newer projects or projects with statuses are also eliminated


In [42]:
efis_join_df = pd.read_sql_query(""" 
SELECT 
adv_project_id,
project_id,
project_status_code
FROM EFIS_MV_R_PROG_VW 
""", engine) 

* Exclude project status because it's just the financial status of the project, not construction or whatever.

In [43]:
#project_status = pd.read_sql_query(""" 
#SELECT DISTINCT project_status_code, 
#project_status
#FROM ACCOUNTING_EXP_CWA_VW  
#""", engine) 

In [44]:
# project_status

In [45]:
# Have to fill in nans with no status
efis_join_df.project_status_code = efis_join_df.project_status_code.fillna('no status')

In [46]:
# Filter out all 9 codes because this means the project is closed
efis_join_df2 = efis_join_df.loc[~efis_join_df.project_status_code.str.contains('9')]

In [47]:
# Drop project status code. This is just about the project status from the 
# Accounting POV
efis_join_df2 = efis_join_df2.drop(columns = ['project_status_code'])

In [48]:
pd.merge(efis_df, efis_join_df2, on = ['adv_project_id'], how = 'outer', indicator = True)[['_merge']].value_counts()

_merge    
left_only     33921
both          11227
right_only       88
dtype: int64

In [49]:
efis_m1 = pd.merge(efis_df, efis_join_df2, on = ['adv_project_id'], how = 'inner')

In [50]:
efis_join_df2.project_id.nunique()

4891

In [51]:
efis_m1.project_id.nunique()

4818

## Subset only  for the project_ids from `Projects`
* Before manipulating

In [52]:
projects_df_subset = projects_df[['project_id']]

In [53]:
projects_df_subset.shape

(11645, 1)

In [54]:
pd.merge(efis_m1, projects_df_subset, on = ['project_id'], how = 'outer', indicator = True)[['_merge']].value_counts()

_merge    
both          10566
right_only     7238
left_only       661
dtype: int64

In [55]:
accounting_df = pd.merge(efis_m1, projects_df_subset, on = ['project_id'], how = 'inner')

In [56]:

accounting_df.project_id.nunique()

4407

In [57]:
accounting_df.project_id.value_counts().head()

6211(130)    32
5908(031)    28
6211(131)    27
6053(130)    25
5006(219)    23
Name: project_id, dtype: int64

## Bring in pect_description for `Projects` -> Double Check
* PEC codes that are not supposed to have a corresponding PECT code have one after my manipulation -> Correct this.
* Also need to keep the second duplicate because it's the more recent one
* 

In [58]:
def load_pec(excel_file:str)-> pd.DataFrame:
    df = to_snakecase(pd.read_excel(excel_file))
    # Drop rows that are all nan
    df = df.dropna(how='all').reset_index(drop =  True)
    
    # Keep ONLY rows that have "X" under 23/24
    df2 = df.loc[df['23/24'] == 'X'].reset_index(drop = True)
    
    # Find program ONLY rows
    program_only = (df2
             .drop_duplicates(subset = ['pec'])
             .dropna(how='all')
             .reset_index(drop = True)
             .drop(columns = ['pect','19/20', '20/21', '21/22', '22/23', '23/24'])
             .rename(columns = {'description':'program'})
            )
    
    m1 = pd.merge(df2, program_only, how = "left", on = ['pec'])
    m1 = (m1
          .sort_values(['pec','pect'], ascending = [True, False])
          .drop_duplicates(subset=['pec', 'description', 'program'])
          .drop(columns = ['19/20', '20/21', '21/22', '22/23', '23/24'])
          .rename(columns = {'description':'pect_description'})
          .sort_values(['pec','pect'])
          
          .reset_index(drop = True)
         )

    m1.pec = m1.pec.str.replace('.','')
    m1.pect = m1.pect.fillna(0).astype(int)
    return m1

In [59]:
final_pect = load_pec('section2_pect_2023_2024_FY.xlsx')



In [60]:
final_pect.shape

(799, 4)

In [61]:
# final_pect

In [62]:
accounting_df.pect_task_code = accounting_df.pect_task_code.fillna(0).astype(int)

In [63]:
pect_df = pd.merge(accounting_df, final_pect, left_on = ['pec_code', 'pect_task_code'], right_on = ['pec', 'pect'], how = 'left')

In [64]:
accounting_df.head(1)

Unnamed: 0,adv_project_id,fund_code,pec_code,appropriation_category_code,curr_bud_am,cash_exp_am,pect_task_code,project_id
0,1449,42,2030010,809,0.0,-38.99,535,6200(024)


In [65]:
pect_df.head(1)

Unnamed: 0,adv_project_id,fund_code,pec_code,appropriation_category_code,curr_bud_am,cash_exp_am,pect_task_code,project_id,pec,pect,pect_description,program
0,1449,42,2030010,809,0.0,-38.99,535,6200(024),2030010,535.0,Safe Routes to School (SR2S and SRTS),Local Assistance


In [66]:
# Subset 
pect_df2 = pect_df[['pect_description','curr_bud_am', 'project_id']]

In [67]:
len(pect_df2)

10566

In [68]:
# Drop duplicates because we only need one PECT description & project_id combo
len(pect_df2.drop_duplicates())

10368

In [69]:
pect_df2 = pect_df2.drop_duplicates().reset_index(drop = True)

In [70]:
# Pivot so the PECT_description becomes the columns
# curr_bud_sum are just placeholders
pect_df3 = pect_df2.pivot_table(index=['project_id'], columns='pect_description', 
                    values=['curr_bud_am'], aggfunc='sum')

In [71]:
pect_df3.columns = pect_df3.columns.droplevel()

In [72]:
pect_df3 = pect_df3.reset_index()

In [73]:
pect_df3 = pect_df3.fillna('No')

In [74]:
pect_df3.sample()

pect_description,project_id,Active Transportation Program (ATP),Bridge Inspection & Scour Evaluation,COVID Relief Funds for Highway Infrastructure Programs for STIP-COVID Augmentation,Carbon Reduction Program (CRP),Congestion Mitigation & Air Quality Improvement Program (CMAQ),Coronavirus Response and Relief Supplemental Appropriations Act (CRRSAA) Funds,Corridor Mobility Improvement Account (CMIA) Program,County Exchange Funds,County State Match Program,"Earmarks Projects (HPP, DEMO CPFCDS, etc.)",Emergency Relief (ER),Ferry Boat Program (FBP) and Ferry Boat Discretionary (FBD) Program,"Funds for Planning, Programming and Monitoring - RIP",General Funded Designated Programs,Hazard Elimination Safety (HES),High Risk Rural Roads Program (HR3),Highway Bridge,Highway Safety Improvement Program (HSIP) (Infrastructure)-State Fund,Highway Safety Improvement Program (HSIP) (Non-Infrastructure),Highway Safety Improvement Program (HSIP)(Infrastructure)-Federal Fund,Local Partnership Program (LPP – Competitive),Local Roads,Local Roads Rehabilitation,Railroad Grade Crossing Protection,Railroad Grade Separations,"Rebuilding American Infrastructure with Sustainability and Equity (RAISE) and Multimodal Project Discretionary Grant Programs (e.g., INFRA, MEGA, RSTG or RURAL)",Regional Improvement Program – Regional Share of STIP Transportation Enhancement (Off System),Regional Surface Transportation Block Grant Program (RSTBGP) and Highway Infrastructure Program (HIP),Regional Transportation Planning Agency (RTPA) STP Match Exchange,SB1 Funded Freeway Service Patrol,SHOPP- Traffic Light Synchronization Program (TLSP)- Proposition 1B Bond Funds,Safe Routes to School (SR2S and SRTS),Set-Aside Coordinated Border Infrastructure (CBI) Program under FAST Act,Solutions for Congested Corridors Program (SCCP),Special Programs,State-Local Partnership Program (SLPP) and Local Partnership Program (LPP-Formulaic),Structures Seismic Retrofit,Trade Corridor Enhancement Account (TCEA) Programs – Local Share,Trade Corridor Enhancement Account (TCEA) Programs – State Share,Trade Corridors Improvement Fund (TCIF) Program Local Streets & Roads,Traffic Congestion Relief Program ( TCRP )
3805,6088(082),No,No,No,No,No,No,No,No,No,No,No,No,290000.0,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No


In [75]:
# Change integers to yes 
pect_df3 = pect_df3.mask(pect_df3.apply(lambda x : pd.to_numeric(x,errors='coerce')).notnull(),'Yes')

In [76]:
pect_df3 = to_snakecase(pect_df3)

In [77]:
pect_df3.project_id.nunique()

4356

In [78]:
pd.merge(pect_df3, projects_df, on = ['project_id'], how = 'outer', indicator = True)[['_merge']].value_counts()

_merge    
right_only    7289
both          4356
left_only        0
dtype: int64

In [79]:
pect_code_cols = list(pect_df3.columns)

In [80]:
pect_code_cols.remove('project_id')

In [81]:
# Update projects
project_df = pd.merge(projects_df, pect_df3, on = ['project_id'], how = 'left')

In [82]:
project_df.project_id.nunique()

11645

In [83]:
# Fill in unknown
project_df[pect_code_cols] = project_df[pect_code_cols].fillna('Unknown')

### Double check

In [84]:
pect_df2.loc[pect_df2.project_id == '5918(101)']

Unnamed: 0,pect_description,curr_bud_am,project_id
1677,Highway Bridge,690839.49,5918(101)
1678,"Earmarks Projects (HPP, DEMO CPFCDS, etc.)",238679.79,5918(101)
1679,Regional Surface Transportation Block Grant Program (RSTBGP) and Highway Infrastructure Program (HIP),0.0,5918(101)
1680,Highway Bridge,472887.51,5918(101)


In [85]:
project_df.loc[project_df.project_id == '5918(101)'].style.where(lambda val: 'Yes' in str(val), 'color: red')

Unnamed: 0,project_id,county_code,comment_desc,district_code,est_total_prj_costs,location_name,project_label_name,project_planning_id,original_post_mile_begin_id,original_post_mile_end_id,revised_post_mile_begin_ind,revised_post_mile_end_ind,route_name,state_hwy_ind,senate_district_code,agency_name,urban_area_code,county_name,current_phase,active_transportation_program_(atp),bridge_inspection_&_scour_evaluation,covid_relief_funds_for_highway_infrastructure_programs_for_stip-covid_augmentation,carbon_reduction_program_(crp),congestion_mitigation_&_air_quality_improvement_program_(cmaq),coronavirus_response_and_relief_supplemental_appropriations_act_(crrsaa)_funds,corridor_mobility_improvement_account_(cmia)_program,county_exchange_funds,county_state_match_program,"earmarks_projects_(hpp,_demo_cpfcds,_etc.)",emergency_relief_(er),ferry_boat_program_(fbp)_and_ferry_boat_discretionary_(fbd)_program,"funds_for_planning,_programming_and_monitoring_-_rip",general_funded_designated_programs,hazard_elimination_safety_(hes),high_risk_rural_roads_program_(hr3),highway_bridge_,highway_safety_improvement_program_(hsip)_(infrastructure)-state_fund,highway_safety_improvement_program_(hsip)_(non-infrastructure),highway_safety_improvement_program_(hsip)(infrastructure)-federal_fund,local_partnership_program_(lpp_–_competitive)_,local_roads,local_roads_rehabilitation,railroad_grade_crossing_protection,railroad_grade_separations,"rebuilding_american_infrastructure_with_sustainability_and_equity_(raise)_and_multimodal_project_discretionary_grant_programs_(e.g.,_infra,_mega,_rstg_or_rural)_",regional_improvement_program_–_regional_share_of_stip_transportation_enhancement_(off_system),regional_surface_transportation_block_grant_program_(rstbgp)_and_highway_infrastructure_program_(hip),regional_transportation_planning_agency_(rtpa)_stp_match_exchange,sb1_funded_freeway_service_patrol,shopp-_traffic_light_synchronization_program_(tlsp)-_proposition_1b_bond_funds,safe_routes_to_school_(sr2s_and_srts),set-aside_coordinated_border_infrastructure_(cbi)_program_under_fast_act,solutions_for_congested_corridors_program_(sccp),special_programs,state-local_partnership_program_(slpp)_and_local_partnership_program_(lpp-formulaic),structures_seismic_retrofit_,trade_corridor_enhancement_account_(tcea)_programs_–_local_share,trade_corridor_enhancement_account_(tcea)_programs_–_state_share,trade_corridors_improvement_fund_(tcif)_program_local_streets_&_roads,traffic_congestion_relief_program_(_tcrp_)
1413,5918(101),5918,"4-26-2023: told Neal Hay to do a BAR request and that he cannot ask for more than what was lapsed - JC 1/10/22: TCT JWalton adv of CWA expring and funds lapsing. need invoice by Apr 1, 2022. ab 8/2/17: email SRiddle re: inactive status. ab County will seek to replace (SR= 53.6)",3,,"On Howsley Road, 1.02 Mile East of State Route 99, Br",Bridge Replacement,,,,,,0-CR,N,,Sutter County,,Sutter County,single phase,No,No,No,No,No,No,No,No,No,Yes,No,No,No,No,No,No,Yes,No,No,No,No,No,No,No,No,No,No,Yes,No,No,No,No,No,No,No,No,No,No,No,No,No


In [86]:
project_df.sample()

Unnamed: 0,project_id,county_code,comment_desc,district_code,est_total_prj_costs,location_name,project_label_name,project_planning_id,original_post_mile_begin_id,original_post_mile_end_id,revised_post_mile_begin_ind,revised_post_mile_end_ind,route_name,state_hwy_ind,senate_district_code,agency_name,urban_area_code,county_name,current_phase,active_transportation_program_(atp),bridge_inspection_&_scour_evaluation,covid_relief_funds_for_highway_infrastructure_programs_for_stip-covid_augmentation,carbon_reduction_program_(crp),congestion_mitigation_&_air_quality_improvement_program_(cmaq),coronavirus_response_and_relief_supplemental_appropriations_act_(crrsaa)_funds,corridor_mobility_improvement_account_(cmia)_program,county_exchange_funds,county_state_match_program,"earmarks_projects_(hpp,_demo_cpfcds,_etc.)",emergency_relief_(er),ferry_boat_program_(fbp)_and_ferry_boat_discretionary_(fbd)_program,"funds_for_planning,_programming_and_monitoring_-_rip",general_funded_designated_programs,hazard_elimination_safety_(hes),high_risk_rural_roads_program_(hr3),highway_bridge_,highway_safety_improvement_program_(hsip)_(infrastructure)-state_fund,highway_safety_improvement_program_(hsip)_(non-infrastructure),highway_safety_improvement_program_(hsip)(infrastructure)-federal_fund,local_partnership_program_(lpp_–_competitive)_,local_roads,local_roads_rehabilitation,railroad_grade_crossing_protection,railroad_grade_separations,"rebuilding_american_infrastructure_with_sustainability_and_equity_(raise)_and_multimodal_project_discretionary_grant_programs_(e.g.,_infra,_mega,_rstg_or_rural)_",regional_improvement_program_–_regional_share_of_stip_transportation_enhancement_(off_system),regional_surface_transportation_block_grant_program_(rstbgp)_and_highway_infrastructure_program_(hip),regional_transportation_planning_agency_(rtpa)_stp_match_exchange,sb1_funded_freeway_service_patrol,shopp-_traffic_light_synchronization_program_(tlsp)-_proposition_1b_bond_funds,safe_routes_to_school_(sr2s_and_srts),set-aside_coordinated_border_infrastructure_(cbi)_program_under_fast_act,solutions_for_congested_corridors_program_(sccp),special_programs,state-local_partnership_program_(slpp)_and_local_partnership_program_(lpp-formulaic),structures_seismic_retrofit_,trade_corridor_enhancement_account_(tcea)_programs_–_local_share,trade_corridor_enhancement_account_(tcea)_programs_–_state_share,trade_corridors_improvement_fund_(tcif)_program_local_streets_&_roads,traffic_congestion_relief_program_(_tcrp_)
8860,5150(015),5953,"Data Migrated from CTIPS :\r\nThe Project Planning Id are: 5328; \r\nThe locations are :Install Rectangular Rapid Flashing Beacons (RRFB) with LED lights, updated signing, and pavement markings at uncontrolled crosswalks.;",7,,"Install Rectangular Rapid Flashing Beacons (RRFB) with LED lights, updated signing, and pavement markings at uncontrolled crosswalks.",Phase 2 of completion pedestrian safety at the remaining uncontrolled crosswalk location in the cit,5328,,,,,0-HNTP,N,,Huntington Park,3041,Los Angeles County,single phase,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown


## Phase_Funding Table

### Bring in fund_code

In [87]:
def load_fund_codes(excel_file:str)->pd.DataFrame:
    df = pd.read_excel('lp2000_2023_fund_codes.xlsx')
    
    # Pad codes
    df['0001'] = df['0001'].apply(lambda x: f'{x:04}')
    df = df.rename(columns = {'General Fund':'general_fund'})
    
    return df

In [88]:
fund_codes = load_fund_codes("lp2000_2023_fund_codes.xlsx")

In [89]:
fund_phase_df = pd.merge(accounting_df, fund_codes, left_on = ['fund_code'], right_on = ['0001'], how = 'left')
fund_phase_df = fund_phase_df.drop(columns = ['0001'])

In [90]:
fund_phase_df.general_fund = fund_phase_df.general_fund.fillna('No Fund Info')

In [91]:
fund_phase_df.general_fund.value_counts()

Federal Trust Fund                                   7918
State Highway Account                                1580
Road Maintenance & Rehabilitation Account, STF        437
No Fund Info                                          394
Local Bridge Seismic Retrofit Acct                     76
Environmental Enhanc & Mitigat Prgm Fd                 56
Transportation Investment Fund                         30
Transportation Deferred Investment Fund                20
Trade Corridor Enhancement Account, STF                18
Trade Corridors Improvement Fund                       13
Traffic Congestion Relief Fund                          8
State-Local Partnership Program Acct                    7
Highway Safety,Rehabilitation,& Preservation Acct       7
Transportation Faciilities Account                      1
Corridor Mobility Improvement Account                   1
Name: general_fund, dtype: int64

In [92]:
# Sum up everything 
fund_phase_df_pivot1 = (fund_phase_df
                  .groupby(['project_id','general_fund'])
                  .agg({'curr_bud_am':'sum', 
                        'cash_exp_am':'sum'})
                  .reset_index()
                  .rename(columns = {'curr_bud_am':'single_phase_cost',
                                    'cash_exp_am':'single_phase_expenditure'})
                  )

In [93]:
fund_phase_df_pivot1.head()

Unnamed: 0,project_id,general_fund,single_phase_cost,single_phase_expenditure
0,0001(002),Federal Trust Fund,20743617.04,20743617.04
1,0014(005),Federal Trust Fund,879983.23,879983.23
2,0027(012),Federal Trust Fund,12830458.87,12830458.87
3,0061(025),Federal Trust Fund,2595722.0,2595722.0
4,15A5(001),Federal Trust Fund,849820.3,700737.01


In [94]:
# No fund detail -> just want the total cost 
fund_phase_df_pivot2 = (fund_phase_df
                  .groupby(['project_id'])
                  .agg({'curr_bud_am':'sum', 
                        'cash_exp_am':'sum'})
                  .reset_index()
                  .rename(columns = {'curr_bud_am':'single_phase_cost',
                                    'cash_exp_am':'single_phase_expenditure_amt'})
                  )

In [95]:
fund_phase_df_pivot2.head()

Unnamed: 0,project_id,single_phase_cost,single_phase_expenditure_amt
0,0001(002),20743617.04,20743617.04
1,0014(005),879983.23,879983.23
2,0027(012),12830458.87,12830458.87
3,0061(025),2595722.0,2595722.0
4,15A5(001),849820.3,700737.01


In [96]:
# Pivot so general_fund will be the column names
# https://stackoverflow.com/questions/22798934/pandas-long-to-wide-reshape-by-two-variables
fund_phase_df_pivot1 = fund_phase_df_pivot1.pivot_table(index=['project_id'], columns='general_fund', 
                    values=['single_phase_cost'], aggfunc='sum')

In [97]:
fund_phase_df_pivot1.columns = fund_phase_df_pivot1.columns.droplevel()

In [98]:
fund_phase_df_pivot1 = fund_phase_df_pivot1.reset_index()

In [99]:
fund_phase_df_pivot1 = to_snakecase(fund_phase_df_pivot1)

In [100]:
state_only_columns = list((fund_phase_df_pivot1.filter(regex='account|fd|acct|fund').columns))

In [101]:
state_only_columns.remove('no_fund_info')

In [102]:
state_only_columns.remove('federal_trust_fund')

In [103]:
fund_phase_df_pivot1['total_state_funds'] = fund_phase_df_pivot1[state_only_columns].sum(axis = 1).fillna(0)

In [104]:
# Mask integers with bool
fund_phase_df_pivot_bool = fund_phase_df_pivot1.fillna('No')

In [105]:
fund_phase_df_pivot_bool = fund_phase_df_pivot_bool.mask(fund_phase_df_pivot_bool.apply(lambda x : pd.to_numeric(x,errors='coerce')).notnull(),'Yes')

In [106]:
# Merge again so projects will have total budgeted amount
# for the single phase and expenditure
final_fund_phase_df = pd.merge(fund_phase_df_pivot1, fund_phase_df_pivot2, on = ['project_id'])

In [107]:
final_fund_phase_df['total_federal_funds'] = final_fund_phase_df.federal_trust_fund

In [108]:
final_fund_phase_df.shape

(4407, 20)

In [109]:
final_fund_phase_df.project_id.nunique()

4407

In [110]:
def is_state_funds(row):
    if row.total_state_funds > 0:
        return "Yes"
    else:
        return "No"

In [111]:
def is_fed_funds(row):
    if row.total_federal_funds > 0:
        return "Yes"
    else:
        return "No"

In [112]:
final_fund_phase_df["is_state"] = final_fund_phase_df.apply(is_state_funds, axis=1)

In [113]:
final_fund_phase_df["is_federal"] = final_fund_phase_df.apply(is_fed_funds, axis=1)

In [114]:
final_fund_phase_df = final_fund_phase_df.fillna(0)

In [115]:
final_fund_phase_df.loc[final_fund_phase_df.no_fund_info != 0 ].sample()

Unnamed: 0,project_id,corridor_mobility_improvement_account,environmental_enhanc_&_mitigat_prgm_fd,federal_trust_fund,"highway_safety,rehabilitation,&_preservation_acct",local_bridge_seismic_retrofit_acct,no_fund_info,"road_maintenance_&_rehabilitation_account,_stf",state_highway_account,state-local_partnership_program_acct,"trade_corridor_enhancement_account,_stf",trade_corridors_improvement_fund,traffic_congestion_relief_fund,transportation_deferred_investment_fund,transportation_faciilities_account,transportation_investment_fund,total_state_funds,single_phase_cost,single_phase_expenditure_amt,total_federal_funds,is_state,is_federal
3593,5961(018),0.0,0.0,0.0,0.0,0.0,3023637.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3023637.0,0.0,0.0,No,No


In [116]:
final_fund_phase_df.groupby(['is_state','is_federal']).agg({"project_id":'nunique'})

Unnamed: 0_level_0,Unnamed: 1_level_0,project_id
is_state,is_federal,Unnamed: 2_level_1
No,No,356
No,Yes,2657
Yes,No,1253
Yes,Yes,141


### Double Checking
* Make sure the project flag is correct

In [117]:
fund_phase_df.project_id.value_counts().head()

6211(130)    32
5908(031)    28
6211(131)    27
6053(130)    25
5006(219)    23
Name: project_id, dtype: int64

In [118]:
final_fund_phase_df.loc[final_fund_phase_df.project_id == '5944(068)'].style.where(lambda val: 'Yes' in str(val), 'color: red')

Unnamed: 0,project_id,corridor_mobility_improvement_account,environmental_enhanc_&_mitigat_prgm_fd,federal_trust_fund,"highway_safety,rehabilitation,&_preservation_acct",local_bridge_seismic_retrofit_acct,no_fund_info,"road_maintenance_&_rehabilitation_account,_stf",state_highway_account,state-local_partnership_program_acct,"trade_corridor_enhancement_account,_stf",trade_corridors_improvement_fund,traffic_congestion_relief_fund,transportation_deferred_investment_fund,transportation_faciilities_account,transportation_investment_fund,total_state_funds,single_phase_cost,single_phase_expenditure_amt,total_federal_funds,is_state,is_federal
3296,5944(068),0.0,0.0,5412383.39,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,85000.0,85000.0,5497383.39,5497383.39,5412383.39,Yes,Yes


In [119]:
fund_phase_df.loc[fund_phase_df.project_id == '5944(068)']

Unnamed: 0,adv_project_id,fund_code,pec_code,appropriation_category_code,curr_bud_am,cash_exp_am,pect_task_code,project_id,general_fund
3907,500000588,890,2030010,203,630485.13,630485.13,300,5944(068),Federal Trust Fund
3908,500000588,3008,2030600,506,85000.0,85000.0,620,5944(068),Transportation Investment Fund
3909,500000588,890,2030010,910,809514.72,809514.72,300,5944(068),Federal Trust Fund
3910,500000588,890,2030010,1213,1001729.0,1001729.0,300,5944(068),Federal Trust Fund
3911,500000588,890,2030010,1516,2970654.54,2970654.54,300,5944(068),Federal Trust Fund


In [120]:
fund_phase_df.loc[(fund_phase_df.project_id == '5944(068)')&(fund_phase_df.general_fund == 'Federal Trust Fund')][['curr_bud_am']].sum()

curr_bud_am   5412383.39
dtype: float64

In [121]:
fund_phase_df.loc[fund_phase_df.project_id == '5944(068)'][['curr_bud_am']].sum()

curr_bud_am   5497383.39
dtype: float64

In [122]:
final_fund_phase_df.loc[final_fund_phase_df.project_id == '5944(068)'].total_state_funds + final_fund_phase_df.loc[final_fund_phase_df.project_id == '5944(068)'].federal_trust_fund

3296   5497383.39
dtype: float64

In [123]:
final_fund_phase_df.loc[final_fund_phase_df.project_id == '5006(219)']

Unnamed: 0,project_id,corridor_mobility_improvement_account,environmental_enhanc_&_mitigat_prgm_fd,federal_trust_fund,"highway_safety,rehabilitation,&_preservation_acct",local_bridge_seismic_retrofit_acct,no_fund_info,"road_maintenance_&_rehabilitation_account,_stf",state_highway_account,state-local_partnership_program_acct,"trade_corridor_enhancement_account,_stf",trade_corridors_improvement_fund,traffic_congestion_relief_fund,transportation_deferred_investment_fund,transportation_faciilities_account,transportation_investment_fund,total_state_funds,single_phase_cost,single_phase_expenditure_amt,total_federal_funds,is_state,is_federal
399,5006(219),0.0,0.0,32967253.86,0.0,229400.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,229400.0,33196653.86,32534546.43,32967253.86,Yes,Yes


In [124]:
fund_phase_df.loc[(fund_phase_df.project_id == '5006(219)')&(fund_phase_df.general_fund == 'Federal Trust Fund')][['curr_bud_am']].sum()

curr_bud_am   32967253.86
dtype: float64

In [125]:
229400.00 + 32967253.86

33196653.86

In [126]:
fund_phase_df.loc[fund_phase_df.project_id ==  '5006(219)']

Unnamed: 0,adv_project_id,fund_code,pec_code,appropriation_category_code,curr_bud_am,cash_exp_am,pect_task_code,project_id,general_fund
5904,700001158,890,2030010,506,1000000.0,1000000.0,810,5006(219),Federal Trust Fund
5905,700001158,890,2030010,1415,0.0,0.0,300,5006(219),Federal Trust Fund
5906,700001158,890,2030010,809,1691542.0,1691542.0,810,5006(219),Federal Trust Fund
5907,700001158,890,2030010,1011,25448.42,25448.42,810,5006(219),Federal Trust Fund
5908,700001158,890,2030010,1112,20206009.54,20206009.54,300,5006(219),Federal Trust Fund
5909,700001158,890,2030010,1314,3216979.12,3216979.12,300,5006(219),Federal Trust Fund
5910,700001158,890,2030010,1415,154672.27,154672.27,300,5006(219),Federal Trust Fund
5911,700001158,890,2030010,1617,608787.0,294068.82,300,5006(219),Federal Trust Fund
5912,700001158,890,2030010,1920,1876299.0,1653630.55,300,5006(219),Federal Trust Fund
5913,700001158,890,2030010,1819,106000.0,94331.53,300,5006(219),Federal Trust Fund


## Awards Table

In [127]:
pect_df.sample()

Unnamed: 0,adv_project_id,fund_code,pec_code,appropriation_category_code,curr_bud_am,cash_exp_am,pect_task_code,project_id,pec,pect,pect_description,program
10321,1214000100,890,2030010,1920,357.0,357.0,810,6212(024),2030010,810.0,Regional Surface Transportation Block Grant Program (RSTBGP) and Highway Infrastructure Program (HIP),Local Assistance


In [128]:
accounting_df.sample()

Unnamed: 0,adv_project_id,fund_code,pec_code,appropriation_category_code,curr_bud_am,cash_exp_am,pect_task_code,project_id
4419,518000016,890,2030010,2021,359344.0,11339.39,650,32L0(337)


In [129]:
# Only want the most recent year of a pec_code listed once
awards_df = (pect_df
                  .groupby(['project_id', 'program'])
                  .agg({'appropriation_category_code':'max'})
                  .reset_index()
                  .rename(columns = {'appropriation_category_code':'state_fiscal_awarded_year',
                                     'program':'grant_program'})
                  )

## Checks

In [130]:
awards_df.project_id.value_counts().head()

6066(140)    3
5956(221)    3
5938(233)    3
5475(038)    3
6090(059)    3
Name: project_id, dtype: int64

In [131]:
awards_df.loc[awards_df.project_id == "5288(046)"]

Unnamed: 0,project_id,grant_program,state_fiscal_awarded_year
1950,5288(046),Local Assistance,2223
1951,5288(046),"Proposition 1B, Hwy Safety, Traffic Reduction, Air Quality , and Port Security Bond Act of 2006, and SB 1: The Road Repair and Accountability Act of 2017",1718
1952,5288(046),State Transportation Improvement Program (STIP),1819


In [132]:
# Check original df 
pect_df.loc[pect_df.project_id == "5288(046)"]

Unnamed: 0,adv_project_id,fund_code,pec_code,appropriation_category_code,curr_bud_am,cash_exp_am,pect_task_code,project_id,pec,pect,pect_description,program
2189,320000120,890,2030600,1819,1985000.0,1673403.81,620,5288(046),2030600,620.0,Local Roads,State Transportation Improvement Program (STIP)
2190,320000120,3290,2030210,1718,10000000.0,8419665.27,210,5288(046),2030210,210.0,Local Partnership Program (LPP – Competitive),"Proposition 1B, Hwy Safety, Traffic Reduction, Air Quality , and Port Security Bond Act of 2006, and SB 1: The Road Repair and Accountability Act of 2017"
2191,320000120,890,2030600,1819,13015000.0,10872625.7,620,5288(046),2030600,620.0,Local Roads,State Transportation Improvement Program (STIP)
2192,320000120,890,2030010,2223,750000.0,0.0,810,5288(046),2030010,810.0,Regional Surface Transportation Block Grant Program (RSTBGP) and Highway Infrastructure Program (HIP),Local Assistance


In [133]:
project_df[project_df.project_id ==  "5288(046)"].style.where(lambda val: 'Yes' in str(val), 'color: red')

Unnamed: 0,project_id,county_code,comment_desc,district_code,est_total_prj_costs,location_name,project_label_name,project_planning_id,original_post_mile_begin_id,original_post_mile_end_id,revised_post_mile_begin_ind,revised_post_mile_end_ind,route_name,state_hwy_ind,senate_district_code,agency_name,urban_area_code,county_name,current_phase,active_transportation_program_(atp),bridge_inspection_&_scour_evaluation,covid_relief_funds_for_highway_infrastructure_programs_for_stip-covid_augmentation,carbon_reduction_program_(crp),congestion_mitigation_&_air_quality_improvement_program_(cmaq),coronavirus_response_and_relief_supplemental_appropriations_act_(crrsaa)_funds,corridor_mobility_improvement_account_(cmia)_program,county_exchange_funds,county_state_match_program,"earmarks_projects_(hpp,_demo_cpfcds,_etc.)",emergency_relief_(er),ferry_boat_program_(fbp)_and_ferry_boat_discretionary_(fbd)_program,"funds_for_planning,_programming_and_monitoring_-_rip",general_funded_designated_programs,hazard_elimination_safety_(hes),high_risk_rural_roads_program_(hr3),highway_bridge_,highway_safety_improvement_program_(hsip)_(infrastructure)-state_fund,highway_safety_improvement_program_(hsip)_(non-infrastructure),highway_safety_improvement_program_(hsip)(infrastructure)-federal_fund,local_partnership_program_(lpp_–_competitive)_,local_roads,local_roads_rehabilitation,railroad_grade_crossing_protection,railroad_grade_separations,"rebuilding_american_infrastructure_with_sustainability_and_equity_(raise)_and_multimodal_project_discretionary_grant_programs_(e.g.,_infra,_mega,_rstg_or_rural)_",regional_improvement_program_–_regional_share_of_stip_transportation_enhancement_(off_system),regional_surface_transportation_block_grant_program_(rstbgp)_and_highway_infrastructure_program_(hip),regional_transportation_planning_agency_(rtpa)_stp_match_exchange,sb1_funded_freeway_service_patrol,shopp-_traffic_light_synchronization_program_(tlsp)-_proposition_1b_bond_funds,safe_routes_to_school_(sr2s_and_srts),set-aside_coordinated_border_infrastructure_(cbi)_program_under_fast_act,solutions_for_congested_corridors_program_(sccp),special_programs,state-local_partnership_program_(slpp)_and_local_partnership_program_(lpp-formulaic),structures_seismic_retrofit_,trade_corridor_enhancement_account_(tcea)_programs_–_local_share,trade_corridor_enhancement_account_(tcea)_programs_–_state_share,trade_corridors_improvement_fund_(tcif)_program_local_streets_&_roads,traffic_congestion_relief_program_(_tcrp_)
7834,5288(046),5924,"Data Migrated from CTIPS : The Project Planning Id are: 1785; The locations are :In Folsom on White Rock Road in the vicinity of the Scott Road Intersection. Widen 1 mile of 4-lane roadway and signalize 1 Intersection.; 2/13/2020: This project is the same as STPL-6498(003). Agency is determining whether CMGC negotiations will be viable via the JPA and if not, project will be turned over to City of Folsom to implement/construct. CR 2/13/2020: This project is the same as Project has $10,000 LPP and $15,000 RIP/STIP. 8/22/22: Cost adj to correct local funds to local AC $6,201,500. Erroneously entered as local funds in prior sequence.",3,25750000.0,"In City of Folsom, on White Rock Road from Prairie City Road to East Bidwell Street.",Construct 4 lane road with 8 foot shoulders,1785,,,,,0-FOL,N,,Folsom,3067,Sacramento County,single phase,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Yes,Yes,No,No,No,No,No,Yes,No,No,No,No,No,No,No,No,No,No,No,No,No


In [134]:
awards_df.loc[awards_df.project_id == "5475(038)"]

Unnamed: 0,project_id,grant_program,state_fiscal_awarded_year
2475,5475(038),Active Transportation Program (ATP),2223
2476,5475(038),Local Assistance,2223
2477,5475(038),"Proposition 1B, Hwy Safety, Traffic Reduction, Air Quality , and Port Security Bond Act of 2006, and SB 1: The Road Repair and Accountability Act of 2017",2122


In [135]:
# Check original df 
pect_df.loc[pect_df.project_id ==  "5475(038)"]

Unnamed: 0,adv_project_id,fund_code,pec_code,appropriation_category_code,curr_bud_am,cash_exp_am,pect_task_code,project_id,pec,pect,pect_description,program
1708,315000005,890,2030720,2223,1512000.0,0.0,100,5475(038),2030720,100.0,Active Transportation Program (ATP),Active Transportation Program (ATP)
1709,315000005,42,2030210,2122,2860000.0,0.0,350,5475(038),2030210,350.0,Solutions for Congested Corridors Program (SCCP),"Proposition 1B, Hwy Safety, Traffic Reduction, Air Quality , and Port Security Bond Act of 2006, and SB 1: The Road Repair and Accountability Act of 2017"
1710,315000005,890,2030010,1314,1061999.97,1061999.97,820,5475(038),2030010,820.0,Congestion Mitigation & Air Quality Improvement Program (CMAQ),Local Assistance
1711,315000005,890,2030010,1516,2898000.0,2898000.0,820,5475(038),2030010,820.0,Congestion Mitigation & Air Quality Improvement Program (CMAQ),Local Assistance
1712,315000005,890,2030010,2223,9552155.0,0.0,810,5475(038),2030010,810.0,Regional Surface Transportation Block Grant Program (RSTBGP) and Highway Infrastructure Program (HIP),Local Assistance


In [136]:
project_df[project_df.project_id ==  "5475(038)"].style.where(lambda val: 'Yes' in str(val), 'color: red')

Unnamed: 0,project_id,county_code,comment_desc,district_code,est_total_prj_costs,location_name,project_label_name,project_planning_id,original_post_mile_begin_id,original_post_mile_end_id,revised_post_mile_begin_ind,revised_post_mile_end_ind,route_name,state_hwy_ind,senate_district_code,agency_name,urban_area_code,county_name,current_phase,active_transportation_program_(atp),bridge_inspection_&_scour_evaluation,covid_relief_funds_for_highway_infrastructure_programs_for_stip-covid_augmentation,carbon_reduction_program_(crp),congestion_mitigation_&_air_quality_improvement_program_(cmaq),coronavirus_response_and_relief_supplemental_appropriations_act_(crrsaa)_funds,corridor_mobility_improvement_account_(cmia)_program,county_exchange_funds,county_state_match_program,"earmarks_projects_(hpp,_demo_cpfcds,_etc.)",emergency_relief_(er),ferry_boat_program_(fbp)_and_ferry_boat_discretionary_(fbd)_program,"funds_for_planning,_programming_and_monitoring_-_rip",general_funded_designated_programs,hazard_elimination_safety_(hes),high_risk_rural_roads_program_(hr3),highway_bridge_,highway_safety_improvement_program_(hsip)_(infrastructure)-state_fund,highway_safety_improvement_program_(hsip)_(non-infrastructure),highway_safety_improvement_program_(hsip)(infrastructure)-federal_fund,local_partnership_program_(lpp_–_competitive)_,local_roads,local_roads_rehabilitation,railroad_grade_crossing_protection,railroad_grade_separations,"rebuilding_american_infrastructure_with_sustainability_and_equity_(raise)_and_multimodal_project_discretionary_grant_programs_(e.g.,_infra,_mega,_rstg_or_rural)_",regional_improvement_program_–_regional_share_of_stip_transportation_enhancement_(off_system),regional_surface_transportation_block_grant_program_(rstbgp)_and_highway_infrastructure_program_(hip),regional_transportation_planning_agency_(rtpa)_stp_match_exchange,sb1_funded_freeway_service_patrol,shopp-_traffic_light_synchronization_program_(tlsp)-_proposition_1b_bond_funds,safe_routes_to_school_(sr2s_and_srts),set-aside_coordinated_border_infrastructure_(cbi)_program_under_fast_act,solutions_for_congested_corridors_program_(sccp),special_programs,state-local_partnership_program_(slpp)_and_local_partnership_program_(lpp-formulaic),structures_seismic_retrofit_,trade_corridor_enhancement_account_(tcea)_programs_–_local_share,trade_corridor_enhancement_account_(tcea)_programs_–_state_share,trade_corridors_improvement_fund_(tcif)_program_local_streets_&_roads,traffic_congestion_relief_program_(_tcrp_)
2664,5475(038),5924,"10/1/2020: Original AED date was 9/30/2020, new sequence being done to extend date to 9/30/2022. There will be a gap of time that is not reimbursable. CR CMAQ Emissions Benefit: .03 ROG, .02 NOx, .01 PM10 Project has EPSP approval for $2,646,524 of CMAQ for R/W to 15/16 FY. And EPSP for $291,476 of CMAQ for PE to 15/16 FY.",3,36291000.0,"Auburn Blvd. Complete Streets - Phase 2. On Auburn Blvd, in Citrus Heights from Rusch Park to Northern City Limits.",Pedestrian and Bike Path,1804A,,,,,0-CHts,N,,Citrus Heights,3067,Sacramento County,single phase,Yes,No,No,No,Yes,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Yes,No,No,No,No,No,Yes,No,No,No,No,No,No,No


## Save to Excel

In [137]:
# https://stackoverflow.com/questions/28837057/pandas-writing-an-excel-file-containing-unicode-illegalcharactererror
project_df = project_df.applymap(lambda x: x.encode('unicode_escape').
                 decode('utf-8') if isinstance(x, str) else x)

In [138]:
project_df.sample(3)

Unnamed: 0,project_id,county_code,comment_desc,district_code,est_total_prj_costs,location_name,project_label_name,project_planning_id,original_post_mile_begin_id,original_post_mile_end_id,revised_post_mile_begin_ind,revised_post_mile_end_ind,route_name,state_hwy_ind,senate_district_code,agency_name,urban_area_code,county_name,current_phase,active_transportation_program_(atp),bridge_inspection_&_scour_evaluation,covid_relief_funds_for_highway_infrastructure_programs_for_stip-covid_augmentation,carbon_reduction_program_(crp),congestion_mitigation_&_air_quality_improvement_program_(cmaq),coronavirus_response_and_relief_supplemental_appropriations_act_(crrsaa)_funds,corridor_mobility_improvement_account_(cmia)_program,county_exchange_funds,county_state_match_program,"earmarks_projects_(hpp,_demo_cpfcds,_etc.)",emergency_relief_(er),ferry_boat_program_(fbp)_and_ferry_boat_discretionary_(fbd)_program,"funds_for_planning,_programming_and_monitoring_-_rip",general_funded_designated_programs,hazard_elimination_safety_(hes),high_risk_rural_roads_program_(hr3),highway_bridge_,highway_safety_improvement_program_(hsip)_(infrastructure)-state_fund,highway_safety_improvement_program_(hsip)_(non-infrastructure),highway_safety_improvement_program_(hsip)(infrastructure)-federal_fund,local_partnership_program_(lpp_–_competitive)_,local_roads,local_roads_rehabilitation,railroad_grade_crossing_protection,railroad_grade_separations,"rebuilding_american_infrastructure_with_sustainability_and_equity_(raise)_and_multimodal_project_discretionary_grant_programs_(e.g.,_infra,_mega,_rstg_or_rural)_",regional_improvement_program_–_regional_share_of_stip_transportation_enhancement_(off_system),regional_surface_transportation_block_grant_program_(rstbgp)_and_highway_infrastructure_program_(hip),regional_transportation_planning_agency_(rtpa)_stp_match_exchange,sb1_funded_freeway_service_patrol,shopp-_traffic_light_synchronization_program_(tlsp)-_proposition_1b_bond_funds,safe_routes_to_school_(sr2s_and_srts),set-aside_coordinated_border_infrastructure_(cbi)_program_under_fast_act,solutions_for_congested_corridors_program_(sccp),special_programs,state-local_partnership_program_(slpp)_and_local_partnership_program_(lpp-formulaic),structures_seismic_retrofit_,trade_corridor_enhancement_account_(tcea)_programs_–_local_share,trade_corridor_enhancement_account_(tcea)_programs_–_state_share,trade_corridors_improvement_fund_(tcif)_program_local_streets_&_roads,traffic_congestion_relief_program_(_tcrp_)
4782,5252(008),5942,Road Reconstruction,6,,Proj# ready for use.,,,,,,,,,,Parlier,3198.0,Fresno County,single phase,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown
2631,5932(095),5932,Bridge Replacement,10,,"Old Wards Ferry Road over Curtis Creek, Bridge 32C0017.",Bridge Replacement - single-lane bridge with a wider single-lane bridge,,,,,,0-CR,N,,Tuolumne County,,Tuolumne County,single phase,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Yes,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No
2324,5089(028),5919,Asphalt Concrete Overlay,3,,Joiner Parkway Repaving Project,Asphalt Concrete Overlay,,,,,,0-LNCN,N,,Lincoln,3135.0,Placer County,single phase,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Yes,No,No,No,No,No,No,No,No,No,No,No,No,No


In [140]:

with pd.ExcelWriter(f"LP2000.xlsx") as writer:
    awards_df.to_excel(writer, sheet_name="awards", index=False)
    final_fund_phase_df.to_excel(writer, sheet_name="phase_funding", index=False)
    project_df.to_excel(writer, sheet_name="project", index=False)