# Add Project Titles for Additional Projects

In [2]:
# ! pip install nltk
# ! pip install textblob

In [1]:
import numpy as np
import pandas as pd
from siuba import *

from shared_utils import geography_utils
from dla_utils import _dla_utils

from calitp import to_snakecase

import utils



In [2]:
pd.set_option("display.max_columns", 100)
pd.set_option('display.max_colwidth', None)


In [3]:
GCS_FILE_PATH  = 'gs://calitp-analytics-data/data-analyses/dla/dla-iija'

In [4]:
proj = to_snakecase(pd.read_excel(f"{GCS_FILE_PATH}/FMIS_ER01_IIJA_Projects_20230111.xlsx", 

                           sheet_name='IIJA ER01'
                           ))

In [5]:
proj.sample(4)

Unnamed: 0,fmis_transaction_date,program_code,program_code_description,project_number,recipient_project_number,project_title,county_code,congressional_district,improvement_type,improvement_type_description,obligations_amount,summary_recipient_defined_text_field_1_value
144,44602,ER01,EMERGENCY REL 2022 SUPPLEMENT,32L0230,0117000307L1,HUMBOLDT CO ON HERRICK AVENUE PM 0.74 PERMANENT RESTORATION,23,Cong Dist 2,4,4R - No Added Capacity,164.61,L5904NON-MPO
99,44599,ER01,EMERGENCY REL 2022 SUPPLEMENT,32L0012,0517000117L,IN THE COUNTY OF SANTA CRUZ ON SOQUEL-SAN JOSE PM 5.91 REPAIR SLIP OUT EMBANKMENT AND ROADWAY,87,Cong Dist 20,15,Preliminary Engineering,57799.55,L5936AMBAG
408,44768,ER01,EMERGENCY REL 2022 SUPPLEMENT,40A0101,0721000234L,"MALIBU CANYON ROAD FROM CITY OF MALIBU (MILE MARKER 3.55) TO LAS VIRGENES ROAD (MILE MARKER 0.00) REMOVAL OF DEBRIS, ROCK AND SLOUGHAGE MATERIAL ON T",37,Cong Dist 33,15,Preliminary Engineering,10101.27,L5953SCAG
223,44624,ER01,EMERGENCY REL 2022 SUPPLEMENT,32L0540,0219000060L,"ON GILMAN ROAD IN SHASTA COUNTY, AT SEVERAL LOCATIONS FROM 0.7 TO 6.3 MILES EAST OF INTERSTATE 5. EMERGENCY OPENING RESTORE WASHED OUT SHOULDERS, PLA",89,Cong Dist 1,15,Preliminary Engineering,5485.15,L5906SHASTA


### Aggregated project list

In [6]:
## function that adds known agency name to df 
agg_proj_list = utils.identify_agency(proj, 'summary_recipient_defined_text_field_1_value')
    
agg_proj_list = utils.condense_df(agg_proj_list)
    
        ## get new title (str parser) 
agg_proj_list = utils.get_new_desc_title(agg_proj_list)

In [7]:
agg_proj_list.sample(4)

Unnamed: 0,fmis_transaction_date,project_number,implementing_agency,summary_recipient_defined_text_field_1_value,program_code,program_code_description,recipient_project_number,improvement_type,improvement_type_description,project_title,obligations_amount,congressional_district,district,county_code,county_name,implementing_agency_locode,rtpa_name,mpo_name,project_title_new
100,44623,40A0048,Tehama County,L5908NON-MPO,ER01,EMERGENCY REL 2022 SUPPLEMENT,0220000005L,4 | 15,4R - No Added Capacity | Preliminary Engineering,BOWMAN ROAD FROM I-5 TO HWY 36W. CA19-2 EO WORK TO REMOVE AND HAUL DOWNED TREES AND BRANCH FROM BOWMAN RIGHT-OF-WAY FROM CA19-2 DISASTER.,16035.27,Cong Dist 1,2.0,103,Tehama County,5908.0,Tehama County Transportation Commission,NON-MPO,Road Construction in Tehama County
182,44743,32D0002,Santa Cruz County,L5936AMBAG,ER01,EMERGENCY REL 2022 SUPPLEMENT,0517000111L,4 | 15 | 17,4R - No Added Capacity | Preliminary Engineering | Construction Engineering,"SOQUEL-SAN JOSE RD PM 5.36 RECONSTRUCT ROADWAY, SHOULDER AND STABILIZE EMBANKMENT",350557.2,Cong Dist 18,5.0,87,Santa Cruz County,5936.0,Santa Cruz County Regional Transportation Commission,Association of Monterey Bay Area Governments,Reconstruct Shoulders in Santa Cruz County
85,44616,40A0002,Santa Cruz County,L5936AMBAG,ER01,EMERGENCY REL 2022 SUPPLEMENT,0519000144L,15 | 16,Preliminary Engineering | Right of Way,SOQUEL - SAN JOSE ROAD PM 1.25 EMBANKMENT RECONSTRUCTION,438824.0,Cong Dist 20,5.0,87,Santa Cruz County,5936.0,Santa Cruz County Regional Transportation Commission,Association of Monterey Bay Area Governments,Reconstruct Reconstruct Embankment in Santa Cruz County
224,44812,31PX001,California,S ER NONE,ER01,EMERGENCY REL 2022 SUPPLEMENT,0317000236S,6 | 15 | 17,4R - Restoration & Rehabilitation | Preliminary Engineering | Construction Engineering,IN YOLO COUNTY ON ROUTE 16 APPROXIMATELY 1.2 MILES WEST OF THE COLUSA/YOLO COUNTY LINE. CONSTRUCT SOLDIER PILE WALL,6095700.0,Cong Dist 3,3.0,113,Yolo County,,,,Road Restoration & Rehabilitation in Yolo County


In [10]:
## checking to see if change in 
# agg_proj_list>>filter(_.project_title_new.str.contains("Pave"))

### Full project list

In [8]:
full_proj_list = utils.identify_agency(proj, 'summary_recipient_defined_text_field_1_value')
        
aggdf = utils.condense_df(full_proj_list)
        
aggdf = utils.get_new_desc_title(aggdf)
        
#map title back to full df
proj_title_mapping = (dict(aggdf[['project_number', 'project_title_new']].values))
    
full_proj_list['project_title_new'] = full_proj_list.project_number.map(proj_title_mapping)


In [10]:
sorted(list(full_proj_list.project_title_new.unique()))

[' Construction Engineering Projects in Humboldt County',
 ' Construction Engineering Projects in Mendocino County',
 ' Erosion Countermeasures in Cathedral City',
 ' Guardrails in Paradise',
 ' Maintenance Resurfacing Statewide',
 ' New Construction Roadway in Contra Costa County',
 ' Overhead in Alameda County',
 ' Plant Vegetation in Marin County',
 ' Preliminary Engineering Projects in Butte County',
 ' Preliminary Engineering Projects in Calaveras County',
 ' Preliminary Engineering Projects in Glenn County',
 ' Preliminary Engineering Projects in Humboldt County',
 ' Preliminary Engineering Projects in Los Angeles County',
 ' Preliminary Engineering Projects in Marin County',
 ' Preliminary Engineering Projects in Mariposa County',
 ' Preliminary Engineering Projects in Mendocino County',
 ' Preliminary Engineering Projects in Napa County',
 ' Preliminary Engineering Projects in Paradise',
 ' Preliminary Engineering Projects in Placer County',
 ' Preliminary Engineering Projects 

In [11]:
full_proj_list.sample(4)

Unnamed: 0,fmis_transaction_date,program_code,program_code_description,project_number,recipient_project_number,project_title,county_code,congressional_district,improvement_type,improvement_type_description,obligations_amount,summary_recipient_defined_text_field_1_value,implementing_agency_locode,implementing_agency,district,county_name,rtpa_name,mpo_name,project_title_new
136,44627,ER01,EMERGENCY REL 2022 SUPPLEMENT,15A7001,0521000177L,RIVER ROAD AT FAIRVIEW PERMANENT RESTORATION,53,Cong Dist 20,4,4R - No Added Capacity,1587165.0,L5944AMBAG,5944.0,Monterey County,5.0,Monterey County,Transportation Agency For Monterey County,Association of Monterey Bay Area Governments,Road Restoration & Rehabilitation in Monterey County
250,44819,ER01,EMERGENCY REL 2022 SUPPLEMENT,32L0111,0117000174L,"MATTOLE ROAD (F3C010) PM 43.62 GRIND EXISTING ROADWAY, RECOMPACT/AUGMENT ROAD BASE AND SURFACE.",23,Cong Dist 2,17,Construction Engineering,4591.94,L5904NON-MPO,5904.0,Humboldt County,1.0,Humboldt County,Humboldt County Association of Governments,NON-MPO,Road Construction in Humboldt County
37,44601,ER01,EMERGENCY REL 2022 SUPPLEMENT,32L0240,0117000317L,MATTOLE ROAD PM 13.67 PERMANENT RESTORATION,23,Cong Dist 2,17,Construction Engineering,61063.73,L5904NON-MPO,5904.0,Humboldt County,1.0,Humboldt County,Humboldt County Association of Governments,NON-MPO,Road Restoration & Rehabilitation in Humboldt County
262,44824,ER01,EMERGENCY REL 2022 SUPPLEMENT,40A0078,0820000058L,"INDIAN CANYON DR @ WHITEWATER WASH REMOVAL OF MUD AND DEBRIS COVERING THE ENTIRE WIDTH OF ROADWAY, FOR A SMALLER PORTION OF COVERED AREA AS PART OF E",65,Cong Dist 36,4,4R - No Added Capacity,91351.0,L5282SCAG,5282.0,Palm Springs,8.0,Riverside County,Riverside County Transportation Commission,Southern California Association Of Governments,Road Construction in Palm Springs


## export: 

In [12]:
full_proj_list = utils.title_column_names(full_proj_list)

In [14]:
# full_proj_list.to_csv(f"{GCS_FILE_PATH}/FMIS_projects_ER01_all.csv")

In [15]:
agg_proj_list = utils.title_column_names(agg_proj_list)

In [17]:
# agg_proj_list.to_csv(f"{GCS_FILE_PATH}/FMIS_projects_ER01_agg.csv")