# Run Functions to Add Information to Projects

In [1]:
# ! pip install nltk

In [2]:
import numpy as np
import pandas as pd
from siuba import *

from calitp_data_analysis.sql import to_snakecase

from shared_utils import geography_utils

import dla_utils

import _data_utils
import _script_utils


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas


In [3]:
pd.set_option("display.max_columns", 100)
pd.set_option('display.max_colwidth', None)

## Read in Data and function development

In [4]:
GCS_FILE_PATH  = 'gs://calitp-analytics-data/data-analyses/dla/dla-iija'

In [5]:
## replace the file path with the new data file
proj = to_snakecase(pd.read_excel(f"{GCS_FILE_PATH}/FMIS_Projects_Universe_IIJA_Reporting_062923_ToDLA.xlsx",))

In [6]:

# df = utils.read_data_all()
proj = _data_utils.add_new_codes(proj)
    
## function that adds known agency name to df 
df = _script_utils.identify_agency(proj, 'summary_recipient_defined_text_field_1_value')

In [7]:
df.sample(3)

Unnamed: 0,fmis_transaction_date,program_code,program_code_description,project_number,recipient_project_number,project_title,county_code,congressional_district,project_status_description,project_description,improvement_type,improvement_type_description,total_cost_amount,obligations_amount,summary_recipient_defined_text_field_1_value,implementing_agency_locode,implementing_agency,district,county_name,rtpa_name,mpo_name
364,2022-08-01,Y400,Congestion Mitigation & Air Quality Improvement,5060385,0621000109L,"MCKINLEY AND BLYTHE AVENUES INTERSECTION; AND MCKINELY AVENUE (NORTHSIDE) FROM CECELIA AVE TO 400 FT EAST OF BLYTHE AVENUE INSTALL TRAFFIC SIGNALS, B",19,Cong Dist 16,Active,"MCKINLEY AND BLYTHE AVENUES INTERSECTION; AND MCKINELY AVENUE (NORTHSIDE) FROM CECELIA AVE TO 400 FT EAST OF BLYTHE AVENUE INSTALL TRAFFIC SIGNALS, BIKE LANE, STREETLIGHTS, STORM DRAIN, SIGNING AND STRIPING, LEFT TURN POCKET, CONCRETE SIDEWALKS, CURB, GUTTER AND CURB RAMPS.",16,Right of Way,381000.0,265000.0,L5060COFCG,5060.0,Fresno,6.0,Fresno County,Council of Fresno County Governments,Council Of Fresno County Goverments
3162,2023-06-28,ER03,Emergency Relieve Funding,39A9004,0120000030S,"MENDOCINO COUNTY NEAR BOONVILLE FROM 0.1 MILE WEST OF SINGLEY CATTLEPASS TO 1.3MILES WEST OF SODA CREEK BRIDGE CONSTRUCT GROUND ANCHOR WALL, PLACE RS",45,Cong Dist 2,Active,"ON STATE ROUTE: 253. MENDOCINO COUNTY NEAR BOONVILLE FROM 0.1 MILE WEST OF SINGLEY CATTLEPASS TO 1.3MILES WEST OF SODA CREEK BRIDGE CONSTRUCT GROUND ANCHOR WALL, PLACE RSP, PLACE HMA AND AB",21,Safety,818597.31,724704.19,S NON-MPO,,California,1.0,Mendocino County,,
383,2022-05-10,Y001,National Highway Performance Program (NHPP),X029141,0618000019S,KERN COUNTY IN THE CITY OF BAKERSFIELD ON ROUTE 99 FROM 0.3 MILE SOUTH OF BELLE TERRACE OVERCROSSING TO 0.1 MILE NORTH OF ROUTE 58; ALSO ON ROUTE 58,29,Cong Dist 23,Active,"ON STATE ROUTE: 99. KERN COUNTY IN THE CITY OF BAKERSFIELD ON ROUTE 99 FROM 0.3 MILE SOUTH OF BELLE TERRACE OVERCROSSING TO 0.1 MILE NORTH OF ROUTE 58; ALSO ON ROUTE 58 FROM ROUTE 99 TO 0.2 MILE EAST OF ROUTE 99. CONSTRUCT AUXILIARY LANE, RECONSTRUCT OVERCROSSING AND REALIGN AND CONSTRUCT CONNECTOR. (TC)",11,Bridge Replacement - No Added Capacity,21637400.0,20213400.0,S KCOG,,California,9.0,Kern County,,


## Test & Export

In [8]:
### run the data through the rest of the script
### return a dataset that is aggregated at the project and program code

agg = _script_utils.get_clean_data(df, full_or_agg = 'agg')



In [9]:
agg.sample(32)

Unnamed: 0,fmis_transaction_date,project_number,implementing_agency,summary_recipient_defined_text_field_1_value,program_code,program_code_description,recipient_project_number,improvement_type,improvement_type_description,old_project_title_desc,obligations_amount,congressional_district,district,county_code,county_name,county_name_abbrev,implementing_agency_locode,rtpa_name,mpo_name,new_project_title,new_description_col
1344,2023-06-15,5444019,Orinda,L5444MTC,Y001,National Highway Performance Program (NHPP),0419000097L,14|17,Bridge Rehabilitation - No Added Capacity|Construction Engineering,MINER ROAD BRIDGE OVER SAN PABLO CREEK (BR #28C0330) SEISMIC RETROFIT,281136,|11|,|04|,13,Contra Costa County,|CC|,5444.0,Metropolitan Transportation Commission,Metropolitan Transportation Commission,Seismic Retrofit in Orinda,"Seismic Retrofit in Orinda, part of the National Highway Performance Program (NHPP). (Federal Project ID: 5444019)."
963,2023-03-29,5929242,San Joaquin County,L5929SJCOG,Y120,Bridge Formula Program,1012000117L,17|10,Construction Engineering|Bridge Replacement - Added Capacity,SEXTON ROAD OVER S. SAN JOAQUIN IRRIGATION DISTRICT (SSJID) CANAL (BRIDGE 29C0319) BRIDGE REPLACEMENT (TC),2797500,|10|09|,|10|,77,San Joaquin County,|SJQ|,5929.0,San Joaquin Council of Governments,San Joaquin Council Of Goverments,Replace Bridge in San Joaquin County,"Replace Bridge in San Joaquin County, part of the Bridge Formula Program. (Federal Project ID: 5929242)."
1047,2023-05-02,5361023,Novato,L5361MTC,Y001,National Highway Performance Program (NHPP),0400002094L,13|17,Bridge Rehabilitation - Added Capacity|Construction Engineering,GRANT AVENUE OVER NOVATO CREEK. BR.# 27C0021 BRIDGE REHABILITATION AND SCOUR MITIGATION,3219836,|02|,|04|,41,Marin County,|MRN|,5361.0,Metropolitan Transportation Commission,Metropolitan Transportation Commission,Erosion Countermeasures in Novato,"Erosion Countermeasures in Novato, part of the National Highway Performance Program (NHPP). (Federal Project ID: 5361023)."
454,2022-09-09,X001665,California,S MTC,YS32,Section 164 Penalties - Use for HSIP Activities,0415000090S,17|21,Construction Engineering|Safety,ALAMEDA COUNTY AT VARIOUS LOCATIONS INSTALL ACCESSIBLE PEDESTRIAN SIGNAL AND RE-STRIPE CROSSWALK.,491500,|01|,|nan|,999,Statewide,|NA|,,,,Install Pedestrian Safety Improvements in Alameda County,"Install Pedestrian Safety Improvements in Alameda County, part of the Section 164 Penalties - Use for HSIP Activities. (Federal Project ID: X001665)."
642,2022-12-08,P051033,California,S SACOG,Y240,Surface Transportation Block Grant,0312000054S,15,Preliminary Engineering,IN THE CITY OF SACRAMENTO AT THE AMERICAN RIVER BRIDGE NO.24-0003 FROM NORTH OF B STREET UNDERPASS TO NORTH OF EXPOSITION BOULEVARD OVERCROSSING. WID,1800700,|06|,|03|,67,Sacramento County,|SAC|,,,,Preliminary Engineering Projects in Sacramento County,"Preliminary Engineering Projects in Sacramento County, part of the Surface Transportation Block Grant. (Federal Project ID: P051033)."
1085,2023-05-05,P013039,California,S MTC,YS30,Highway Safety Improvement Program (HSIP),0415000356S,17|21,Construction Engineering|Safety,ALAMEDA COUNTY OUTTER BARRIER SEPARTION BETWEEN ROUTE 13 AND TWO FRONTAGE ROADS,5673500,|13|,|04|,1,Alameda County,|ALA|,,,,Safety Improvements in Alameda County,"Safety Improvements in Alameda County, part of the Highway Safety Improvement Program (HSIP). (Federal Project ID: P013039)."
83,2022-03-07,5906119,Shasta County,L5906SHASTA,Y001,National Highway Performance Program (NHPP),0214000079L,11|17,Bridge Replacement - No Added Capacity|Construction Engineering,"GAS POINT ROAD BRIDGE (06C-0183) OVER NO NAME DITCH, 1.9 MILES WEST OF I-5. BR.# 06C0183 BRIDGE REPLACEMENT",831795,|01|,|02|,89,Shasta County,|SHA|,56.0,Shasta County Regional Transportation Planning Agency,Shasta County Regional Transportation Planning Age,Replace Bridge in Shasta County,"Replace Bridge in Shasta County, part of the National Highway Performance Program (NHPP). (Federal Project ID: 5906119)."
444,2022-09-09,0801120,California,S MTC,Y001,National Highway Performance Program (NHPP),0419000294S,14|17,Bridge Rehabilitation - No Added Capacity|Construction Engineering,THE CITY AND COUNTY OF SAN FRANCISCO FROM 16TH STREET TO 4TH STREET. PAINT STEEL STRUCTURE,16810131,|12|,|04|,75,San Francisco County,|SFO|,,,,Bridge Rehabilitation in San Francisco County,"Bridge Rehabilitation in San Francisco County, part of the National Highway Performance Program (NHPP). (Federal Project ID: 0801120)."
306,2022-07-21,39AS001,California,S ER NONE,ER01,Emergency Relieve Funding,0119000079S,6|17,4R - Restoration & Rehabilitation|Construction Engineering,IN MENDOCINO COUNTY NEAR WILLITS AT 0.4 MILES WEST OF HAEL CREEK BR#10-129 REPAIR SLIPOUT,787800,|02|,|01|,45,Mendocino County,|MEN|,,,,Road Restoration & Rehabilitation in Mendocino County,"Road Restoration & Rehabilitation in Mendocino County, part of the Emergency Relieve Funding. (Federal Project ID: 39AS001)."
1495,2023-06-26,5004049,San Diego,L5004SANDAG,Y908,Bridge Replacement and Rehabilitation Program,11955780L,10|17,Bridge Replacement - Added Capacity|Construction Engineering,"WEST MISSION BAY DRIVE OVER THE SAN DIEGO RIVER BRIDGE REPLACEMENT, BR. NO. 57C-0023",9711359,|52|,|11|,73,San Diego County,|SDG|,4.0,San Diego Association of Governments,San Diego Association Of Governments,Replace Bridge in San Diego,"Replace Bridge in San Diego, part of the Bridge Replacement and Rehabilitation Program. (Federal Project ID: 5004049)."


In [10]:
# #check project names

# sorted(list(agg.new_project_title))

In [11]:
len(agg)

1612

In [12]:
### pretty print the column names

agg = _script_utils.title_column_names(agg)

In [13]:
#### export new file to the GCS bucket
# agg.to_csv(f"{GCS_FILE_PATH}/FMIS_Projects_Universe_IIJA_Reporting_062923_rerun.csv")

In [14]:
# ###test full. PASS title, no concat and 1241 rows
# full = _script_utils.get_clean_data(df, full_or_agg = 'full')

### check a singular project

In [15]:
# full>>filter(_["Recipient Project Number"]=="0118000055L")

In [16]:
# full = _script_utils.title_column_names(full)

In [20]:
# full.to_csv(f"{GCS_FILE_PATH}/FMIS_Projects_Universe_IIJA_Reporting_rerun_5-29-23_all.csv")