# Add Agency and Agency Information to FMIS IIJA

In [1]:
import numpy as np
import pandas as pd
from siuba import *

from shared_utils import geography_utils
from dla_utils import _dla_utils

from calitp import to_snakecase



In [2]:
pd.set_option("display.max_columns", 100)
pd.set_option('display.max_colwidth', None)


In [3]:
GCS_FILE_PATH  = 'gs://calitp-analytics-data/data-analyses/dla/dla-iija'

## Read In Data

In [4]:
locodes = to_snakecase(pd.read_excel(f"gs://calitp-analytics-data/data-analyses/dla/e-76Obligated/locodes_updated7122021.xlsx"))

In [5]:
county = to_snakecase(pd.read_excel(f"{GCS_FILE_PATH}/Copy of County.xlsx",
                                   sheet_name='locode1',
                                   header=[0]))

In [6]:
proj = to_snakecase(pd.read_excel(f"{GCS_FILE_PATH}/CopyofFMIS_Projects_Universe_IIJA_Reporting_4.xls", 
                           # sheet_name='FMIS 5 Projects  ', header=[3]
                           sheet_name='IIJA-combined',
                           # sheet_name='FMIS 5 Projects  ',
                           ))

In [7]:
# proj2 = to_snakecase(pd.read_excel(f"{GCS_FILE_PATH}/Copy of FMIS_Projects_Universe_(IIJA_Reporting)_ (5).xls", 
#                            sheet_name='FMIS 5 Projects  ', header=[3]))

In [8]:
# number of entries in the locodes list
len(locodes)

1041

In [9]:
# number of entries in the county locode list
len(county)

1072

In [10]:
county.sample(4)

Unnamed: 0,agency_locode,agency_name,district,mpo,county
204,5205,Rialto,8.0,SCAG,San Bernardino County
285,5287,Wasco,6.0,KCOG,Kern County
941,6394,Sacramento Valley Air Basin,,NON-MPO,
309,5311,Ione,10.0,NON-MPO,Amador County


In [11]:
locodes.head()

Unnamed: 0,agency_locode,agency_name,district,county_name,rtpa_name,mpo_name,mpo_locode_fads,active_e76s______7_12_2021_
0,6302,Humboldt Bay Harbor Recreation & Conservation District,1,Humboldt County,Humboldt County Association of Governments,NON-MPO,NON-MPO,
1,6330,Willow Creek Community Services District,1,Humboldt County,Humboldt County Association of Governments,NON-MPO,NON-MPO,
2,5036,Trinidad,1,Humboldt County,Humboldt County Association of Governments,NON-MPO,NON-MPO,
3,5049,Ukiah,1,Mendocino County,Mendocino Council of Governments,NON-MPO,NON-MPO,Yes
4,5082,Willits,1,Mendocino County,Mendocino Council of Governments,NON-MPO,NON-MPO,


In [12]:
proj.drop(columns =['unnamed:_0', 'unnamed:_13', 'total'], axis=1, inplace=True)

In [13]:
proj.sample()

Unnamed: 0,fmis_transaction_date,program_code,program_code_description,project_number,recipient_project_number,project_title,county_code,congressional_district,improvement_type,improvement_type_description,obligations_amount,summary_recipient_defined_text_field_1_value
224,2022-06-16,YS30,HIGHWAY SAFETY IMP PROG IIJA,6205024,0517000039L,ON SR 135 (BROADWAY) IN SANTA MARIA FROM FOSTER RD. TO PREISKER LN. INCREASE VISIBILITY OF TRAFFIC SIGNAL INDICATIONS BY INSTALLING ADDITIONAL TRAFFI,83,Cong Dist 24,21,Safety,795900.0,S6205SBCAG


## Get Locode Substring

In [14]:
string = proj['summary_recipient_defined_text_field_1_value'].iloc[0]

In [15]:
string

'L5253SCAG'

In [16]:
print(string.find('5'))
print(string.find('3'))

1
4


In [17]:
# need to extract string from position 1-4. 

In [18]:
proj['locode'] = proj.summary_recipient_defined_text_field_1_value.apply(lambda x: x[1:5])

In [19]:
proj.head()

Unnamed: 0,fmis_transaction_date,program_code,program_code_description,project_number,recipient_project_number,project_title,county_code,congressional_district,improvement_type,improvement_type_description,obligations_amount,summary_recipient_defined_text_field_1_value,locode
0,2022-06-01,YS30//YS30,HIGHWAY SAFETY IMP PROG IIJA,5253021,0720000168L,120TH STREET FROM PRAIRIE AVENUE TO FELTON AVENUE. TRAFFIC SIGNAL UPGRADES AT NINE SIGNALIZED INTERSECTIONS AND IMPROVE CROSSINGS AND SIGNAGE.,37,Cong Dist 43,17//21,Construction Engineering//Safety,1305800.0,L5253SCAG,5253
1,2022-09-15,Y001,NATIONAL HIGHWAY PERF IIJA,NBIL539,0815000215L,"13TH STREET OVER WILSON CREEK FROM OAK GLEN ROAD TO KENTUCKY STREET, LWC 00L0017 REPLACE LOW WATER CROSSING WITH 2-LANE BRIDGE",71,Cong Dist 8,44,Other,301838.32,L5457SCAG,5457
2,2022-07-25,YS30//YS30,HIGHWAY SAFETY IMP PROG IIJA,5132050,0421000374L,"16 LOCATIONS IN THE CITY OF FAIRFIELD INSTALL ADAPTIVE SIGNAL TIMING AND ADVANCED DILEMMA-ZONE DETECTION, AND ENHANCE THE VISIBILITY OF SIGNALS.",95,Cong Dist 3,17//21,Construction Engineering//Safety,935150.0,L5132MTC,5132
3,2022-03-07,Y233//Y233//Y233//Y233,STBG IIJA OFF-SYSTEM BRIDGE,5914078,0100020461L,2.2 MILES NORTH OF STATE ROUTE 20 ON WITTER SPRINGS ROAD OVER COOPER CREEK. BR.# 14C0119 BRIDGE REPLACEMENT (TC),33,Cong Dist 3,11//15//16//17,Bridge Replacement - No Added Capacity//Preliminary Engineering//Right of Way//Construction Engineering,1615000.0,L5914NON-MPO,5914
4,2022-07-18,YS30//YS30,HIGHWAY SAFETY IMP PROG IIJA,5094071,0419000558L,21 INTERSECTIONS; ON ALAMO DRIVE FROM MERCHANT STREET TO NUT TREE ROAD AND ON PEABODY ROAD FROM ELMIRA ROAD TO FOXBORO PARKWAY IMPROVE SIGNAL HARDWAR,95,Cong Dist 3,17//21,Construction Engineering//Safety,2027700.0,L5094MTC,5094


In [20]:
county.sample()

Unnamed: 0,agency_locode,agency_name,district,mpo,county
851,6304,Arcade Creek Recreation and Park District,3.0,SACOG,Sacramento County


## Merge data on Locode

In [21]:
proj['locode'] = pd.to_numeric(proj['locode'], errors='coerce')

In [55]:
proj_all = (pd.merge(proj, locodes, left_on='locode', right_on='agency_locode', how='left'))

In [56]:
proj_all.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 330 entries, 0 to 329
Data columns (total 21 columns):
 #   Column                                        Non-Null Count  Dtype         
---  ------                                        --------------  -----         
 0   fmis_transaction_date                         330 non-null    datetime64[ns]
 1   program_code                                  330 non-null    object        
 2   program_code_description                      330 non-null    object        
 3   project_number                                330 non-null    object        
 4   recipient_project_number                      330 non-null    object        
 5   project_title                                 330 non-null    object        
 6   county_code                                   330 non-null    int64         
 7   congressional_district                        330 non-null    object        
 8   improvement_type                              330 non-null    object  

In [57]:
proj_all = proj_all.rename(columns={'agency_name':'implementing_agency',
                                   'locode':'implementing_agency_locode'})

In [58]:
#if we use other locode list then drop these columns
proj_all.drop(columns =['active_e76s______7_12_2021_', 'mpo_locode_fads', 'agency_locode'], axis=1, inplace=True)

In [59]:
proj_all.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 330 entries, 0 to 329
Data columns (total 18 columns):
 #   Column                                        Non-Null Count  Dtype         
---  ------                                        --------------  -----         
 0   fmis_transaction_date                         330 non-null    datetime64[ns]
 1   program_code                                  330 non-null    object        
 2   program_code_description                      330 non-null    object        
 3   project_number                                330 non-null    object        
 4   recipient_project_number                      330 non-null    object        
 5   project_title                                 330 non-null    object        
 6   county_code                                   330 non-null    int64         
 7   congressional_district                        330 non-null    object        
 8   improvement_type                              330 non-null    object  

In [62]:
# one locode did not match
proj_all>>filter(_.implementing_agency_locode.isnull())

Unnamed: 0,fmis_transaction_date,program_code,program_code_description,project_number,recipient_project_number,project_title,county_code,congressional_district,improvement_type,improvement_type_description,obligations_amount,summary_recipient_defined_text_field_1_value,implementing_agency_locode,implementing_agency,district,county_name,rtpa_name,mpo_name
284,2022-04-26,Y001,NATIONAL HIGHWAY PERF IIJA,NBIS522,0000001453L5,STATEWIDE - IN SERVICE BRIDGES OWNED BY LOCAL AGENCIES FEDERALLY MANDATED BRIDGE INSPECTION PROGRAM,67,Cong Dist 3,49,Bridge Inspection and Bridge Related Training,14164800.0,S NON-MPO,,,,,,


In [63]:
proj_all.head()

Unnamed: 0,fmis_transaction_date,program_code,program_code_description,project_number,recipient_project_number,project_title,county_code,congressional_district,improvement_type,improvement_type_description,obligations_amount,summary_recipient_defined_text_field_1_value,implementing_agency_locode,implementing_agency,district,county_name,rtpa_name,mpo_name
0,2022-06-01,YS30//YS30,HIGHWAY SAFETY IMP PROG IIJA,5253021,0720000168L,120TH STREET FROM PRAIRIE AVENUE TO FELTON AVENUE. TRAFFIC SIGNAL UPGRADES AT NINE SIGNALIZED INTERSECTIONS AND IMPROVE CROSSINGS AND SIGNAGE.,37,Cong Dist 43,17//21,Construction Engineering//Safety,1305800.0,L5253SCAG,5253.0,Hawthorne,7.0,Los Angeles County,Los Angeles County Metropolitan Transportation Auth.,Southern California Association Of Governments
1,2022-09-15,Y001,NATIONAL HIGHWAY PERF IIJA,NBIL539,0815000215L,"13TH STREET OVER WILSON CREEK FROM OAK GLEN ROAD TO KENTUCKY STREET, LWC 00L0017 REPLACE LOW WATER CROSSING WITH 2-LANE BRIDGE",71,Cong Dist 8,44,Other,301838.32,L5457SCAG,5457.0,Yucaipa,8.0,San Bernardino County,San Bernardino Associated Governments,Southern California Association Of Governments
2,2022-07-25,YS30//YS30,HIGHWAY SAFETY IMP PROG IIJA,5132050,0421000374L,"16 LOCATIONS IN THE CITY OF FAIRFIELD INSTALL ADAPTIVE SIGNAL TIMING AND ADVANCED DILEMMA-ZONE DETECTION, AND ENHANCE THE VISIBILITY OF SIGNALS.",95,Cong Dist 3,17//21,Construction Engineering//Safety,935150.0,L5132MTC,5132.0,Fairfield,4.0,Solano County,Metropolitan Transportation Commission,Metropolitan Transportation Commission
3,2022-03-07,Y233//Y233//Y233//Y233,STBG IIJA OFF-SYSTEM BRIDGE,5914078,0100020461L,2.2 MILES NORTH OF STATE ROUTE 20 ON WITTER SPRINGS ROAD OVER COOPER CREEK. BR.# 14C0119 BRIDGE REPLACEMENT (TC),33,Cong Dist 3,11//15//16//17,Bridge Replacement - No Added Capacity//Preliminary Engineering//Right of Way//Construction Engineering,1615000.0,L5914NON-MPO,5914.0,Lake County,1.0,Lake County,Lake County/City Area Planning Council,NON-MPO
4,2022-07-18,YS30//YS30,HIGHWAY SAFETY IMP PROG IIJA,5094071,0419000558L,21 INTERSECTIONS; ON ALAMO DRIVE FROM MERCHANT STREET TO NUT TREE ROAD AND ON PEABODY ROAD FROM ELMIRA ROAD TO FOXBORO PARKWAY IMPROVE SIGNAL HARDWAR,95,Cong Dist 3,17//21,Construction Engineering//Safety,2027700.0,L5094MTC,5094.0,Vacaville,4.0,Solano County,Metropolitan Transportation Commission,Metropolitan Transportation Commission


In [65]:
list(proj_all.implementing_agency.unique())

['Hawthorne',
 'Yucaipa',
 'Fairfield',
 'Lake County',
 'Vacaville',
 'Benicia',
 'Pico Rivera',
 'Marin County',
 'Los Banos',
 'Santa Clara County',
 'Santa Barbara County',
 'Fresno County',
 'Santa Ana',
 'Stockton',
 'Caltrans',
 'Sacramento',
 'San Benito County',
 'Pinole',
 'Sacramento County',
 'Merced County',
 'Shasta County',
 'Livingston',
 'El Dorado County',
 'Sanger',
 'Palmdale',
 'Tulare County',
 'Coachella',
 'Bakersfield',
 'Colton',
 'Visalia',
 'Rialto',
 'Metropolitan Transportation Commission',
 'Bellflower',
 'Belmont',
 'Larkspur',
 'Monterey County',
 'Madera County',
 'Moraga',
 'Orinda',
 'Redding',
 'Cathedral City',
 'Alameda',
 'Clovis',
 'Gardena',
 'Mission Viejo',
 'Pleasanton',
 'Fresno',
 'Butte County',
 'Rohnert Park',
 'Alameda County Transportation Commission',
 'Yolo County',
 'Stanislaus County',
 'Placer County',
 'Contra Costa County',
 'Mariposa County',
 'Santa Barbara',
 'Nevada County',
 'Calaveras County',
 'Long Beach',
 'Los Angeles

## Writing to GCS

In [29]:
#proj_all.to_csv(f"{GCS_FILE_PATH}/FMIS_projects_wip.csv")

## Adding Place Names (can do)
* we have a list of city names to county from [Caltrans PlaceNames](https://dot.ca.gov/-/media/dot-media/programs/research-innovation-system-information/documents/place-names/2019-place-names-in-california-a11y.pdf)


In [30]:
# city_place_names = (to_snakecase(pd.read_excel('gs://calitp-analytics-data/data-analyses/dla/e-76Obligated/2020-place-names-locode.xlsx', sheet_name=0)))

In [31]:
# city_place_names.sample()

In [32]:
# city_place_names.drop(columns =['unnamed:_1', 'unnamed:_3', 'unnamed:_4','unnamed:_6','unnamed:_7', 'date_of_incorporation',
#                                'city_name_abbr_','name','dist_', 'co_'], axis=1, inplace=True)

In [33]:
# (pd.merge(proj_all, city_place_names, left_on='agency_locode', right_on='ct_city_code', how='left', indicator=True))._merge.value_counts()

In [34]:
# proj_all1 = (pd.merge(proj_all, city_place_names, left_on='agency_locode', right_on='ct_city_code', how='left', indicator='City'))

In [35]:
# proj_all1.sample(2)

In [36]:
# proj_all1>>filter(_.City!='both')

In [37]:
# county_place_names = (to_snakecase(pd.read_excel('gs://calitp-analytics-data/data-analyses/dla/e-76Obligated/2020-place-names-locode.xlsx', sheet_name=1)))

In [38]:
# county_place_names

## Project Location

In [67]:
location = (proj_all>>select(_.implementing_agency, _.county_name, _.project_title))

In [68]:
location.sample()

Unnamed: 0,implementing_agency,county_name,project_title
115,Fullerton,Orange County,FOURTEEN (14) SELECT INTERSECTIONS CITYWIDE INSTALL FOURTEEN (14) PEDESTRIAN WARNING FLASHING BEACONS CITYWIDE


## Project Title

In [73]:
#subset df to work on title
title = proj_all>>select(_.implementing_agency, _.program_code_description, _.project_title, _.program_code)

In [72]:
title

Unnamed: 0,implementing_agency,program_code_description,project_title,program_code
0,Hawthorne,HIGHWAY SAFETY IMP PROG IIJA,120TH STREET FROM PRAIRIE AVENUE TO FELTON AVENUE. TRAFFIC SIGNAL UPGRADES AT NINE SIGNALIZED INTERSECTIONS AND IMPROVE CROSSINGS AND SIGNAGE.,YS30//YS30
1,Yucaipa,NATIONAL HIGHWAY PERF IIJA,"13TH STREET OVER WILSON CREEK FROM OAK GLEN ROAD TO KENTUCKY STREET, LWC 00L0017 REPLACE LOW WATER CROSSING WITH 2-LANE BRIDGE",Y001
2,Fairfield,HIGHWAY SAFETY IMP PROG IIJA,"16 LOCATIONS IN THE CITY OF FAIRFIELD INSTALL ADAPTIVE SIGNAL TIMING AND ADVANCED DILEMMA-ZONE DETECTION, AND ENHANCE THE VISIBILITY OF SIGNALS.",YS30//YS30
3,Lake County,STBG IIJA OFF-SYSTEM BRIDGE,2.2 MILES NORTH OF STATE ROUTE 20 ON WITTER SPRINGS ROAD OVER COOPER CREEK. BR.# 14C0119 BRIDGE REPLACEMENT (TC),Y233//Y233//Y233//Y233
4,Vacaville,HIGHWAY SAFETY IMP PROG IIJA,21 INTERSECTIONS; ON ALAMO DRIVE FROM MERCHANT STREET TO NUT TREE ROAD AND ON PEABODY ROAD FROM ELMIRA ROAD TO FOXBORO PARKWAY IMPROVE SIGNAL HARDWAR,YS30//YS30
...,...,...,...,...
325,Indio,NATIONAL HIGHWAY PERF IIJA,"WESTBOUND INDIO BOULEVARD OVER WHITEWATER RIVER, BR. NO. 56C-0292 SEISMIC RETROFIT AND SCOUR COUNTERMEASURES",Y001//Y001
326,Kingsburg,CONGESTION MITIGATION IIJA,WESTSIDE OF 18TH AVE FROM STROUD AVE TO KLEPPER ST CONSTRUCT NEW SIDEWALKS,Y400//Y400
327,Santa Ana,TRANS ALTERNATIVES >200K IIJA,"WILLITS STREET FROM FAIRVIEW STREET TO E/S OF RAITT STREET INSTALL MEDIAN, PARKING-PROTECTED BICYCLE LANES, AND DEDICATED BICYCLE SIGNAL HEADS (TC)",Y301//Y301
328,San Joaquin County,STBG IIJA OFF-SYSTEM BRIDGE,WIMER ROAD OVER INDIAN CREEK NORTH BRANCH (BRIDGE 29C0303) BRIDGE REPLACEMENT (TC),Y233//Y233
