## Requested updates submitted January 2025
* Using the additional lists provided, can you please update your script to include additional program codes
* Updates to program descriptions as highlighted in column C 
* Adding the funding type from column F in the script output. 
* Use the “RK Locode” column K in the Project list as the Primary Locode, and if blank, use your current data source to populate the implementing agency.

In [1]:
import _data_utils
import _script_utils
import numpy as np
import pandas as pd
from calitp_data_analysis.sql import to_snakecase

In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [3]:
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/dla/dla-iija"

In [4]:
project_list = "IIJA Project List 01_2025.xlsx"

In [5]:
project_df = to_snakecase(pd.read_excel(f"{GCS_FILE_PATH}/{project_list}"))

In [6]:
project_df.head(2)

Unnamed: 0,fmis_transaction_date,program_code,program_code_description,pid_district,project_number,recipient_project_number,pid_check1,efis_id,pid_check2,project_title,rk_locode,county_code,congressional_district,project_status_description,project_description,improvement_type,improvement_type_description,total_cost_amount,obligations_amount,summary_recipient_defined_text_field_1_value,comp
0,2022-01-20,ER01,EMERGENCY REL 2022 SUPPLEMENT,5.0,31RA002,0518000118S,11,518000118,10,MONTEREY COUNTY NEAR BIG SUR 2.3 MILES NORTH OF CASTRO CANYON BRIDGE TO 0.8 MILE SOUTH OF BIG SUR RIVER BRIDGE. EMERGENCY PROJECT - PERMANENT RESTORA,,53,Cong Dist 20,Active,MONTEREY COUNTY NEAR BIG SUR 2.3 MILES NORTH OF CASTRO CANYON BRIDGE TO 0.8 MILE SOUTH OF BIG SUR RIVER BRIDGE. EMERGENCY PROJECT - PERMANENT RESTORATION. COMPLETE COASTAL DEVELOPMENT PERMIT REQUIREMENTS AT PFEIFFER CANYON BRIDGE.,16,Right of Way,600000.0,531100.0,S AMBAG,IIJA-A
1,2022-01-20,ER01,EMERGENCY REL 2022 SUPPLEMENT,5.0,31RA002,0518000118S,11,518000118,10,MONTEREY COUNTY NEAR BIG SUR 2.3 MILES NORTH OF CASTRO CANYON BRIDGE TO 0.8 MILE SOUTH OF BIG SUR RIVER BRIDGE. EMERGENCY PROJECT - PERMANENT RESTORA,,53,Cong Dist 20,Active,MONTEREY COUNTY NEAR BIG SUR 2.3 MILES NORTH OF CASTRO CANYON BRIDGE TO 0.8 MILE SOUTH OF BIG SUR RIVER BRIDGE. EMERGENCY PROJECT - PERMANENT RESTORATION. COMPLETE COASTAL DEVELOPMENT PERMIT REQUIREMENTS AT PFEIFFER CANYON BRIDGE.,43,Utilities,2770100.0,2452300.0,S AMBAG,IIJA-A


In [7]:
program_codes = "program_codes/Ycodes_01.2025.xlsx"

In [8]:
program_codes_df = to_snakecase(pd.read_excel(f"{GCS_FILE_PATH}/{program_codes}"))

In [9]:
program_codes_df.columns

Index(['program_code', 'short_name', 'program_code_description',
       'funding_type', 'funding_type_code', 'iija_code', 'notes_resources'],
      dtype='object')

### Breakout `_data_utils.update_program_code_list2()`

In [10]:
def add_program_to_row(row):
    if "Program" not in row["program_name"]:
        return row["program_name"] + " Program"
    else:
        return row["program_name"]

In [11]:
original_codes = to_snakecase(
    pd.read_excel(f"{GCS_FILE_PATH}/program_codes/Copy of lst_IIJA_Code_20230908.xlsx")
)[["iija_program_code", "description", "program_name"]]

In [12]:
original_codes.head()

Unnamed: 0,iija_program_code,description,program_name
0,22MP,Metropolitan Transportation Planning (FY 22),Metropolitan Transportation Planning (FY 22)
1,22SP,Statewide and Nonmetropolitan Transportation Planning (FY 22),Statewide and Nonmetropolitan Transportation Planning (FY 22)
2,73AD,Military Construction (FMIS),Military Construction
3,ER01,Emergency Suppliment Funding,Emergency Supplement Funding
4,N003,CMAQ - Projects to Reduce PM 2.5 Emissions,Congestion Mitigation & Air Quality Improvement


In [13]:
def update_program_code_list(new_codes: pd.DataFrame) -> pd.DataFrame:
    original_codes = to_snakecase(
        pd.read_excel(
            f"{GCS_FILE_PATH}/program_codes/Copy of lst_IIJA_Code_20230908.xlsx"
        )
    )[["iija_program_code", "description", "program_name"]]

    program_codes = pd.merge(
        original_codes,
        new_codes,
        on="iija_program_code",
        how="outer",
        indicator=True,
    )

    display(program_codes._merge.value_counts())

    program_codes["new_description"] = (
        program_codes["new_description"]
        .str.strip()
        .fillna(program_codes["description"])
    )

    program_codes = program_codes.drop(columns={"description", "_merge"})

    # program_codes["program_name"] = program_codes.apply(add_program_to_row, axis=1)

    return program_codes

In [14]:
def program_codes_sept_2023() -> pd.DataFrame:
    df = to_snakecase(
        pd.read_excel(
            f"{GCS_FILE_PATH}/program_codes/FY21-22ProgramCodesAsOf5-25-2022.v2_expanded090823.xlsx"
        )
    )[["iija_program_code", "new_description"]]
    return df

In [15]:
program_codes_sept_2023 = program_codes_sept_2023()

In [16]:
program_codes_sept_2023.head(2)

Unnamed: 0,iija_program_code,new_description
0,Y001,National Highway Performance Program (NHPP)
1,Y002,National Highway Performance Program (NHPP)


In [17]:
program_codes_sept_2023.shape

(132, 2)

In [18]:
program_codes1 = update_program_code_list(
    program_codes_sept_2023,
)

both          133
left_only       3
right_only      0
Name: _merge, dtype: int64

In [19]:
program_codes1.head()

Unnamed: 0,iija_program_code,program_name,new_description
0,22MP,Metropolitan Transportation Planning (FY 22),Metropolitan Transportation Planning (FY 22)
1,22SP,Statewide and Nonmetropolitan Transportation Planning (FY 22),Statewide and Nonmetropolitan Transportation Planning (FY 22)
2,73AD,Military Construction,Military Construction (FMIS)
3,ER01,Emergency Supplement Funding,Emergency Suppliment Funding
4,N003,Congestion Mitigation & Air Quality Improvement,CMAQ - Projects to Reduce PM 2.5 Emissions


In [20]:
program_codes1 = program_codes1.rename(columns={"new_description": "description"})

In [21]:
program_codes_jan_2025 = "Ycodes_01.2025.xlsx"

In [22]:
def program_codes_jan_2025() -> pd.DataFrame:
    df = to_snakecase(
        pd.read_excel(f"{GCS_FILE_PATH}/program_codes/Ycodes_01.2025.xlsx")
    )[["program_code", "short_name", "program_code_description", "iija_code"]]

    df = df.rename(
        columns={
            "program_code": "iija_program_code",
            "program_code_description": "new_description",
            "short_name": "program_name",
        }
    )
    df.program_name = df.program_name.str.title()
    return df

In [23]:
program_codes_jan_2025 = program_codes_jan_2025()

In [24]:
program_codes_jan_2025.head()

Unnamed: 0,iija_program_code,program_name,new_description,iija_code
0,Y44A,Adv Tech Innv Mobility Deploy,Advanced Transportation Technologies Deployment Program,ATTD
1,Y110,Hip Bridge Formula Program,Bridge Formula Program,BFP
2,Y113,Hip Bridge Formula Program,Bridge Formula Program,BFP
3,Y114,Hip Bridge Formula Program-24,Bridge Formula Program,BFP
4,Y115,Hip Bridge Formula Program-25,Bridge Formula Program,BFP


In [25]:
program_codes2 = update_program_code_list(
    program_codes_jan_2025,
)

left_only     86
both          50
right_only    22
Name: _merge, dtype: int64

In [26]:
program_codes2.columns

Index(['iija_program_code', 'program_name_x', 'program_name_y',
       'new_description', 'iija_code'],
      dtype='object')

In [27]:
program_codes2.program_name_x = program_codes2.program_name_x.fillna(
    program_codes2.program_name_y
)

In [28]:
program_codes2 = program_codes2.rename(columns={"program_name_x": "program"}).drop(
    columns=["program_name_y"]
)

In [29]:
program_codes2.head()

Unnamed: 0,iija_program_code,program,new_description,iija_code
0,22MP,Metropolitan Transportation Planning (FY 22),Metropolitan Transportation Planning (FY 22),
1,22SP,Statewide and Nonmetropolitan Transportation Planning (FY 22),Statewide and Nonmetropolitan Transportation Planning (FY 22),
2,73AD,Military Construction,Military Construction (FMIS),
3,ER01,Emergency Supplement Funding,Emergency Supplement Funding,ER
4,N003,Congestion Mitigation & Air Quality Improvement,CMAQ - Projects to Reduce PM 2.5 Emissions,


### Many program_name values are missing because the program code file that was emailed January 2025 contains new codes that didn't appear before. Which column in the January 2025 dfata now represents `program_name`?

In [None]:
program_codes2.loc[program_codes2.program_name.isna()]

In [None]:
program_codes2.info()