# Add Agency and Agency Information to FMIS IIJA

In [1]:
import numpy as np
import pandas as pd
from siuba import *

from shared_utils import geography_utils
from dla_utils import _dla_utils

from calitp import to_snakecase

import utils



In [2]:
pd.set_option("display.max_columns", 100)
pd.set_option('display.max_colwidth', None)


In [3]:
GCS_FILE_PATH  = 'gs://calitp-analytics-data/data-analyses/dla/dla-iija'

## Read In Data

In [105]:
## other county locodes

# county = to_snakecase(pd.read_excel(f"{GCS_FILE_PATH}/Copy of County.xlsx",
#                                    sheet_name='locode1',
#                                    header=[0]))

In [6]:
proj = to_snakecase(pd.read_excel(f"{GCS_FILE_PATH}/CopyofFMIS_Projects_Universe_IIJA_Reporting_4.xls", 
                           # sheet_name='FMIS 5 Projects  ', header=[3]
                           sheet_name='IIJA-combined',
                           # sheet_name='FMIS 5 Projects  ',
                           ))

In [9]:
# number of entries in the county locode list
len(county)

1072

In [10]:
county.sample(4)

Unnamed: 0,agency_locode,agency_name,district,mpo,county
859,6312,Access Services,7.0,SCAG,Los Angeles County
1067,7511,Santa Ynez Band of Chumash Indians,5.0,NON-MPO,Santa Barbara County
787,6240,Napa Valley Wine Train,4.0,MTC,Napa County
772,6225,El Dorado County Transit Authority,3.0,SACOG,El Dorado County


In [12]:
proj.drop(columns =['unnamed:_0', 'unnamed:_13', 'total'], axis=1, inplace=True)

In [13]:
proj.sample()

Unnamed: 0,fmis_transaction_date,program_code,program_code_description,project_number,recipient_project_number,project_title,county_code,congressional_district,improvement_type,improvement_type_description,obligations_amount,summary_recipient_defined_text_field_1_value
94,2022-08-11,Y400,CONGESTION MITIGATION IIJA,6164024,0822000089L,"EASTERN RIVERSIDE COUNTY (18 CORRIDORS): MONTEREY, COOK, PALM DR, BOB HOPE, FRED WARING, DINAH SHORE, GENE AUTRY, DATE PALM, INDIO BLVD, JEFFERSON, P",65,Cong Dist 36,1,New Construction Roadway,30886000.0,L6164SCAG


## Get Locode Substring

In [14]:
string = proj['summary_recipient_defined_text_field_1_value'].iloc[0]

In [15]:
string

'L5253SCAG'

In [16]:
print(string.find('5'))
print(string.find('3'))

1
4


In [17]:
# need to extract string from position 1-4. 

In [18]:
#proj['locode'] = proj.summary_recipient_defined_text_field_1_value.apply(lambda x: x[1:5])

In [19]:
#proj.head()

In [20]:
county.sample()

Unnamed: 0,agency_locode,agency_name,district,mpo,county
776,6229,Hayward Area Recreation And Park District,4.0,MTC,Alameda County


## Merge data on Locode

In [21]:
proj_all = utils.add_name_from_locode(proj, 'summary_recipient_defined_text_field_1_value')

In [24]:
proj_all.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 330 entries, 0 to 329
Data columns (total 18 columns):
 #   Column                                        Non-Null Count  Dtype         
---  ------                                        --------------  -----         
 0   fmis_transaction_date                         330 non-null    datetime64[ns]
 1   program_code                                  330 non-null    object        
 2   program_code_description                      330 non-null    object        
 3   project_number                                330 non-null    object        
 4   recipient_project_number                      330 non-null    object        
 5   project_title                                 330 non-null    object        
 6   county_code                                   330 non-null    int64         
 7   congressional_district                        330 non-null    object        
 8   improvement_type                              330 non-null    object  

In [28]:
# one locode did not match
proj_all>>filter(_.implementing_agency_locode.isnull())

Unnamed: 0,fmis_transaction_date,program_code,program_code_description,project_number,recipient_project_number,project_title,county_code,congressional_district,improvement_type,improvement_type_description,obligations_amount,summary_recipient_defined_text_field_1_value,implementing_agency_locode,implementing_agency,district,county_name,rtpa_name,mpo_name
284,2022-04-26,Y001,NATIONAL HIGHWAY PERF IIJA,NBIS522,0000001453L5,STATEWIDE - IN SERVICE BRIDGES OWNED BY LOCAL AGENCIES FEDERALLY MANDATED BRIDGE INSPECTION PROGRAM,67,Cong Dist 3,49,Bridge Inspection and Bridge Related Training,14164800.0,S NON-MPO,,,,,,


In [30]:
list(proj_all.implementing_agency.unique())

['Hawthorne',
 'Yucaipa',
 'Fairfield',
 'Lake County',
 'Vacaville',
 'Benicia',
 'Pico Rivera',
 'Marin County',
 'Los Banos',
 'Santa Clara County',
 'Santa Barbara County',
 'Fresno County',
 'Santa Ana',
 'Stockton',
 'Caltrans',
 'Sacramento',
 'San Benito County',
 'Pinole',
 'Sacramento County',
 'Merced County',
 'Shasta County',
 'Livingston',
 'El Dorado County',
 'Sanger',
 'Palmdale',
 'Tulare County',
 'Coachella',
 'Bakersfield',
 'Colton',
 'Visalia',
 'Rialto',
 'Metropolitan Transportation Commission',
 'Bellflower',
 'Belmont',
 'Larkspur',
 'Monterey County',
 'Madera County',
 'Moraga',
 'Orinda',
 'Redding',
 'Cathedral City',
 'Alameda',
 'Clovis',
 'Gardena',
 'Mission Viejo',
 'Pleasanton',
 'Fresno',
 'Butte County',
 'Rohnert Park',
 'Alameda County Transportation Commission',
 'Yolo County',
 'Stanislaus County',
 'Placer County',
 'Contra Costa County',
 'Mariposa County',
 'Santa Barbara',
 'Nevada County',
 'Calaveras County',
 'Long Beach',
 'Los Angeles

## Writing to GCS

In [31]:
#proj_all.to_csv(f"{GCS_FILE_PATH}/FMIS_projects_wip.csv")

## Adding Place Names (can do)
* we have a list of city names to county from [Caltrans PlaceNames](https://dot.ca.gov/-/media/dot-media/programs/research-innovation-system-information/documents/place-names/2019-place-names-in-california-a11y.pdf)


In [32]:
# city_place_names = (to_snakecase(pd.read_excel('gs://calitp-analytics-data/data-analyses/dla/e-76Obligated/2020-place-names-locode.xlsx', sheet_name=0)))

In [33]:
# city_place_names.sample()

In [34]:
# city_place_names.drop(columns =['unnamed:_1', 'unnamed:_3', 'unnamed:_4','unnamed:_6','unnamed:_7', 'date_of_incorporation',
#                                'city_name_abbr_','name','dist_', 'co_'], axis=1, inplace=True)

In [35]:
# (pd.merge(proj_all, city_place_names, left_on='agency_locode', right_on='ct_city_code', how='left', indicator=True))._merge.value_counts()

In [36]:
# proj_all1 = (pd.merge(proj_all, city_place_names, left_on='agency_locode', right_on='ct_city_code', how='left', indicator='City'))

In [37]:
# proj_all1.sample(2)

In [38]:
# proj_all1>>filter(_.City!='both')

In [39]:
# county_place_names = (to_snakecase(pd.read_excel('gs://calitp-analytics-data/data-analyses/dla/e-76Obligated/2020-place-names-locode.xlsx', sheet_name=1)))

In [40]:
# county_place_names

## Project Location

In [41]:
location = (proj_all>>select(_.implementing_agency, _.county_name, _.project_title))

In [42]:
location.sample()

Unnamed: 0,implementing_agency,county_name,project_title
256,Sierra County,Sierra County,SALMON LAKE ROAD AT SALMON CREEK ~ 300 FT WEST OF GOLD LAKE HIGHWAY. BR.# 13C0053 BRIDGE REPLACEMENT (TC)


In [43]:
len(location)

330

## Project Title

In [44]:
#phrase extraction maybe: https://stackoverflow.com/questions/70995812/extract-keyword-from-sentences-in-a-pandas-text-column-using-nltk-and-or-regex

In [45]:
# ! pip install nltk
# ! pip install textblob

In [46]:
import nltk
from textblob import TextBlob
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize

In [47]:

import re

In [48]:
#subset df to work on title
title = proj_all>>select(_.implementing_agency, _.program_code_description, _.project_title, _.program_code)

In [49]:
title

Unnamed: 0,implementing_agency,program_code_description,project_title,program_code
0,Hawthorne,HIGHWAY SAFETY IMP PROG IIJA,120TH STREET FROM PRAIRIE AVENUE TO FELTON AVENUE. TRAFFIC SIGNAL UPGRADES AT NINE SIGNALIZED INTERSECTIONS AND IMPROVE CROSSINGS AND SIGNAGE.,YS30//YS30
1,Yucaipa,NATIONAL HIGHWAY PERF IIJA,"13TH STREET OVER WILSON CREEK FROM OAK GLEN ROAD TO KENTUCKY STREET, LWC 00L0017 REPLACE LOW WATER CROSSING WITH 2-LANE BRIDGE",Y001
2,Fairfield,HIGHWAY SAFETY IMP PROG IIJA,"16 LOCATIONS IN THE CITY OF FAIRFIELD INSTALL ADAPTIVE SIGNAL TIMING AND ADVANCED DILEMMA-ZONE DETECTION, AND ENHANCE THE VISIBILITY OF SIGNALS.",YS30//YS30
3,Lake County,STBG IIJA OFF-SYSTEM BRIDGE,2.2 MILES NORTH OF STATE ROUTE 20 ON WITTER SPRINGS ROAD OVER COOPER CREEK. BR.# 14C0119 BRIDGE REPLACEMENT (TC),Y233//Y233//Y233//Y233
4,Vacaville,HIGHWAY SAFETY IMP PROG IIJA,21 INTERSECTIONS; ON ALAMO DRIVE FROM MERCHANT STREET TO NUT TREE ROAD AND ON PEABODY ROAD FROM ELMIRA ROAD TO FOXBORO PARKWAY IMPROVE SIGNAL HARDWAR,YS30//YS30
...,...,...,...,...
325,Indio,NATIONAL HIGHWAY PERF IIJA,"WESTBOUND INDIO BOULEVARD OVER WHITEWATER RIVER, BR. NO. 56C-0292 SEISMIC RETROFIT AND SCOUR COUNTERMEASURES",Y001//Y001
326,Kingsburg,CONGESTION MITIGATION IIJA,WESTSIDE OF 18TH AVE FROM STROUD AVE TO KLEPPER ST CONSTRUCT NEW SIDEWALKS,Y400//Y400
327,Santa Ana,TRANS ALTERNATIVES >200K IIJA,"WILLITS STREET FROM FAIRVIEW STREET TO E/S OF RAITT STREET INSTALL MEDIAN, PARKING-PROTECTED BICYCLE LANES, AND DEDICATED BICYCLE SIGNAL HEADS (TC)",Y301//Y301
328,San Joaquin County,STBG IIJA OFF-SYSTEM BRIDGE,WIMER ROAD OVER INDIAN CREEK NORTH BRANCH (BRIDGE 29C0303) BRIDGE REPLACEMENT (TC),Y233//Y233


In [50]:
title.program_code_description.value_counts()

HIGHWAY SAFETY IMP PROG IIJA      75
STBG-URBANIZED >200K IIJA         61
NATIONAL HIGHWAY PERF IIJA        59
STBG IIJA OFF-SYSTEM BRIDGE       49
CONGESTION MITIGATION IIJA        37
SURFAC TRNSP BLK GRTS-FLX IIJA    18
TRANS ALTERNATIVES >200K IIJA     14
PROJ TO REDUCE PM 2.5 EMI IIJA    12
TRANSP ALTERNATIVES FLEX IIJA      4
TRANS ALTERN 50K-200K POP IIJA     1
Name: program_code_description, dtype: int64

In [52]:
clean_type_list_df = utils.get_list_of_words(title, "project_title")

[nltk_data] Downloading package stopwords to /home/jovyan/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /home/jovyan/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [53]:
clean_type_list_df.value_counts()

bridge     178
road       142
avenue     115
ave         77
street      75
          ... 
jamacha      1
bellota      1
belmont      1
j            1
00           1
Length: 1729, dtype: int64

In [54]:
clean_type_list_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4910 entries, 0 to 4909
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   0       4910 non-null   object
dtypes: object(1)
memory usage: 38.5+ KB


In [55]:
clean_type_list_df.value_counts()

bridge     178
road       142
avenue     115
ave         77
street      75
          ... 
jamacha      1
bellota      1
belmont      1
j            1
00           1
Length: 1729, dtype: int64

In [56]:
#splitting up the types of words based on the most common words appearing in title name

fix_type = ['REPLACEMENT', 'INSTALL', 'CONSTRUCT', 'REPLACE', 'SIGNAL', 'TRAFFIC',
           'IMPROVEMENT', 'PEDESTRIAN', 'LANES', 'NEW', 'REHABILITATION',
           'UPGRADE', 'CLASS', 'BIKE', 'WIDEN', 'LANDSCAPING', 'SAFETY', 'RAISED', 
            'SEISMIC', 'SIGNAGE', 'RETROFIT', 'ADD', 'PLANNING', 'PAVE'
           'PREVENTIVE','MAINTENANCE', 'REHAB', 'RESURFACE', 'REPAIR', 'ROUNDABOUT']

area_type = ['BRIDGE', 'ROAD', 'RD', 'AVENUE', 'AVE', 'STREET' , 'ST',
             'FRACTURED', 'LANE', 'DRIVE', 'boulevard', 'BLVD',
             'INTERSECTION', 'intersections', 'WAY', 'DR', 'CURB', 'ROADWAY',
             'TRAIL', 'PATH', 'CREEK', 'RIVER', 'SIDEWALK', 'CORRIDOR', 'PARKWAY',
            'RAMPS', 'GUARDRAIL']

jurisdiction = ['CITY', 'COUNTY', 'STATE', 'UNINCORPORATED']

other = ['TC', 'EXISTING']

In [57]:
def tokenize(texts):
    return [nltk.tokenize.word_tokenize(t) for t in texts]

### using  np.concatenate

In [58]:
#! pip install more-itertools

In [59]:
#from more_itertools import split_after

In [61]:
title.sample()

Unnamed: 0,implementing_agency,program_code_description,project_title,program_code
289,Benicia,HIGHWAY SAFETY IMP PROG IIJA,"THE INTERSECTIONS OF MILITARY WEST AND WEST 5TH ST. AND HASTINGS DR. AND LONDON DR. PEDESTRIAN CROSSING IMPROVEMENTS, INCLUDING: ADA RAMPS RRFB SYSTE",YS30//YS30


In [62]:
text = title>>select (_.project_title)

In [63]:
text['summary'] = utils.key_word_intersection(text, 'project_title')

In [64]:
text.sample(10)

Unnamed: 0,project_title,summary
253,"SACOG REGION - IN SACRAMENTO, SUTTER, YOLO AND YUBA COUNTIES CONDUCT NECESSARY PLANNING, STATE AND FEDERAL PROGRAMMING, AND MONITORING ACTIVITIES, IN","[PLANNING, STATE]"
178,"JENSEN AVE. FROM ELM AVENUE TO TEMPERANCE AVENUE INSTALL ITS ADAPTIVE SYSTEM, UPGRADE DETECTION AND SYNCHRONIZE CORRIDOR. (TC)","[INSTALL, UPGRADE]"
189,"MATTOLE ROAD OVER MATTOLE RIVER (HONEYDEW), EAST OF WILDER RIDGE ROAD AND.MATTOLE ROAD JUNCTION BRIDGE REPLACEMENT (BR # 04C0055)",[BRIDGE]
255,SALEM STREET AT LITTLE CHICO CREEK BRIDGE BR. # 12C-336 ~0.1 M NORTH OF 10TH ST BRIDGE REHABILITATION (TC),"[BRIDGE, BRIDGE, REHABILITATION]"
6,3/8 MI WEST OF PARAMOUNT BLVDWASHINGTON BLVD OVER RIO HONDO.LOCAL BRIDGE # 53C0471 BRIDGE REPLACEMENT (DEMOLISH THE EXISTING 6 LANNE SUPERSTRUCTURE,"[BRIDGE, BRIDGE]"
52,"BRIDGE NO. 28C0331, BEAR CREEK RD, OVER SAN PABLO CREEK, 0.2 MI EAST OF CAMINO PABLO SEISMIC RETROFIT","[BRIDGE, SEISMIC, RETROFIT]"
247,"REQUA ROAD OVER HUNTER CREEK, 0.1 MI. WEST OF SR-101 REPLACE EXISTING 2-LANE BRIDGE (TC)","[REPLACE, BRIDGE]"
312,VARIOUS LOCATIONS THROUGHOUT THE WEST SIDE OF THE CITY. REPAIR EXISTING DAMAGED GUARDRAI,"[CITY, REPAIR]"
289,"THE INTERSECTIONS OF MILITARY WEST AND WEST 5TH ST. AND HASTINGS DR. AND LONDON DR. PEDESTRIAN CROSSING IMPROVEMENTS, INCLUDING: ADA RAMPS RRFB SYSTE","[INTERSECTIONS, PEDESTRIAN]"
170,"INTERSECTIONS OF 65TH ST EXPWY AT 21ST AVE, 65TH ST EXPWY AT FRUITRIDGE RD, ARDEN WAY AT HERITAGE LN, ARDEN WAY AT CHALLENGE WAY, HOWE AVE. AT SWARTH",[INTERSECTIONS]


In [65]:
text>>filter(_.project_title.str.contains('COMPLETE STREET'))

Unnamed: 0,project_title,summary
23,"ARDEN WAY FROM ETHAN WAY TO MORSE AVENUE. ARDEN WAY, FROM FULTON AVENUE TO MORSE AVENUE COMPLETE STREET IMPROVEMENTS INCLUDING SEPARATED SIDEWALK, C",[SIDEWALK]
29,"AVE R BETWEEN SIERRA HIGHWAY AND 25TH STREET. AVE R COMPLETE STREET WITH SIDEWALKS GAP CLOSURES, BIKE LANES, ADA RAMPS AND ENHANCED CROSSWALKS","[BIKE, LANES]"
99,EL CAMINO REAL (SR82) FROM ARROYO TO KAISER WAY COMPLETE STREET IMPROVEMENTS,[]
129,HEALDSBURG AVENUE BETWEEN POWELL AVENUE AND PASSALAQUA ROAD COMPLETE STREET IMPROVEMENTS,[]
290,THE TOWNSITE DRIVE COMPLETE STREETS PROJECT IS LOCATED IN THE CITY OF VISTA ALONG THE SEGMENT OF NORTH DRIVE FROM JUST NORTHWEST OF THE INTERSECTION,"[CITY, INTERSECTION]"


In [66]:

text>>filter(_.project_title.str.contains('BRIDGE REPLACEMENT'))

Unnamed: 0,project_title,summary
3,2.2 MILES NORTH OF STATE ROUTE 20 ON WITTER SPRINGS ROAD OVER COOPER CREEK. BR.# 14C0119 BRIDGE REPLACEMENT (TC),"[STATE, BRIDGE]"
6,3/8 MI WEST OF PARAMOUNT BLVDWASHINGTON BLVD OVER RIO HONDO.LOCAL BRIDGE # 53C0471 BRIDGE REPLACEMENT (DEMOLISH THE EXISTING 6 LANNE SUPERSTRUCTURE,"[BRIDGE, BRIDGE]"
10,ALAMITOS RD. BRIDGE @ ALAMITOS CK (37C0159) BRIDGE REPLACEMENT/SEISMIC RET (TC),"[BRIDGE, BRIDGE]"
24,ARROYA AVENUE OVER WEST DELTA CANAL (BRIDGE 39C0275) BRIDGE REPLACEMENT (TC),"[BRIDGE, BRIDGE]"
25,"ASH CREEK ROAD AT SACRAMENTO RIVER OVERFLOW, BRIDGE 06C0233 BRIDGE REPLACEMENT","[BRIDGE, BRIDGE]"
41,BON AIR BRIDGE (BON AIR RD OVER CORTE MADERA CREEK). BR.# 27C0028 BRIDGE REPLACEMENT,"[BRIDGE, BRIDGE]"
53,BUCK AVENUE OVER ALAMO CREEKBR NO. 23C0011 BRIDGE REPLACEMENT,[BRIDGE]
54,CANYON RD @ ACID CANAL. BR. # 06C0307 BRIDGE REPLACEMENT,[BRIDGE]
55,CASSEL FALL RIVER ROAD BRIDGE AT PIT RIVER. BR. # 06C0039 BRIDGE REPLACEMENT,"[BRIDGE, BRIDGE]"
68,"CR 40 OVER CACHE CREEK, 0.12 MILES SOUTH OF SR 16 BRIDGE REPLACEMENT. REPLACE EXISTING 1 LANE BRIDGE WITH A NEW 1 LANE BRIDGE. BR#22C0091 (TC)","[BRIDGE, REPLACE, BRIDGE, NEW, BRIDGE]"


In [67]:
text>>filter(_.project_title.str.contains('WIDEN'))

Unnamed: 0,project_title,summary
81,"DOWNTOWN LOS ANGELES, BROADWAY BETWEEN 4TH AND 6TH STREETS PEDESTRIAN SAFETY IMPROVEMENTS INCLUDING CURB EXTENSIONS, WIDENED SIDEWALK, CROSSWALK AND","[PEDESTRIAN, SAFETY, SIDEWALK]"
104,"FIRST STREET BETWEEN FLOWER ST AND STANDARD AVE WIDEN EXISTING SIDEWALKS BY 3FT, NARROW VEHICLE LANES, AND CONSTRUCT ADA IMPROVEMENTS ON SIDEWALKS AN","[WIDEN, LANES, CONSTRUCT]"
156,"IN STOCKTON, PARALLEL TO MARCH LANE IN THE EAST BAY MUD CORRIDOR BETWEEN BROOKSIDE ROAD AND HILLSBORO WAY. RECONSTRUCT, WIDEN AND IMPROVE EXISTING PA",[WIDEN]
158,"IN THE CITY OF GALT ON KOST ROAD AT UNION PACIFIC RAILROAD CROSSING, WEST OF JOY DR., EAST OF MARIA WAY. WIDEN 400 FEET OF KOST RD. ON EACH SIDE OF T","[CITY, WIDEN]"
181,LA PAZ ROAD: MUIRLANDS BLVD. TO CRISANTA DR. WIDEN TWO OVERHEADS OVER BNSF,[WIDEN]
194,MILLERTON ROAD FROM FRIANT ROAD TO MARINA DRIVE WIDEN ROADWAY FROM 2LU TO 4LD,[WIDEN]
204,"NEES AVENUE FROM MINNEWAWA AVENUE TO CLOVIS AVENUE ROAD WIDENING AND RECONSTRUCTION, INSTALLATION OF CURB AND GUTTERS, SIDEWALK, BICYCLE LANES, MODIF","[SIDEWALK, LANES]"
220,ON OLD OREGON TRAIL BETWEEN PASO ROBLES AVENUE AND BEAR MOUNTAIN ROAD WIDEN SHOULDERS AND UPGRADE DRAINAGE (TC),"[WIDEN, UPGRADE]"
233,PARK ROAD FROM 250 FT SOUTH OF OAK ROAD TO BAYSHORE ROAD REHAB./RESURFACE ROADWAY AND WIDEN ROAD TO INSTALL ASPHALT CONCRETE PAVED CLASS II/V BICYC,"[WIDEN, INSTALL, CLASS]"
241,"POWER INN ROAD FROM ELSIE AVENUE TO ABOUT 400 FEET NORTH OF MACFADDEN DRIVE. INSTALL CURB, GUTTER, SIDEWALK INFILL AND CURB RAMPS; WIDEN SUBSTANDARD","[INSTALL, SIDEWALK, WIDEN]"


In [68]:
type(text['summary'])

pandas.core.series.Series

### using if statement

In [69]:
title.sample()

Unnamed: 0,implementing_agency,program_code_description,project_title,program_code
59,Gardena,HIGHWAY SAFETY IMP PROG IIJA,"CITY OF GARDENA, VERMONT AVENUE JUST NORTH OF 133RD STREET. INSTALL A PEDESTRIAN CROSSWALK WITH A PEDESTRIAN HYBRID BEACON (PHB) OR HIGH-INTENSITY AC",YS30//YS30


In [70]:
type_list = ['BRIDGE REPLACEMENT', 'COMPLETE STREET', 'VIDEO DETECTION EQUIPMENT', 'SYNCHRONIZE CORRIDOR', 'ROADWAY REALIGNMENTS']

In [71]:
text>>filter(
            _.project_title.str.contains('UPGRADE')| _.project_title.str.contains('IMPROVE')
            )

Unnamed: 0,project_title,summary
0,120TH STREET FROM PRAIRIE AVENUE TO FELTON AVENUE. TRAFFIC SIGNAL UPGRADES AT NINE SIGNALIZED INTERSECTIONS AND IMPROVE CROSSINGS AND SIGNAGE.,"[TRAFFIC, SIGNAL, INTERSECTIONS, SIGNAGE]"
4,21 INTERSECTIONS; ON ALAMO DRIVE FROM MERCHANT STREET TO NUT TREE ROAD AND ON PEABODY ROAD FROM ELMIRA ROAD TO FOXBORO PARKWAY IMPROVE SIGNAL HARDWAR,"[INTERSECTIONS, SIGNAL]"
5,21 LOCATIONS IN CITY OF BENICIA UPGRADE EXISTING GUARDRAILS AND END TREATMENTS.,"[CITY, UPGRADE]"
20,AMERICAN AVENUE FROM MADERA AVENUE TO PLACER AVENUE SHOULDER IMPROVEMENTS,[]
22,"APPIAN WAY AND MARLESTA ROAD INTERSECTION SAFETY IMPROVEMENTS: INSTALLATION OF A TRAFFIC SIGNAL AND TRAFFIC SIGNAGE, INCLUDING ADVANCED WARNING SIGNS","[INTERSECTION, SAFETY, TRAFFIC, SIGNAL, TRAFFIC, SIGNAGE]"
23,"ARDEN WAY FROM ETHAN WAY TO MORSE AVENUE. ARDEN WAY, FROM FULTON AVENUE TO MORSE AVENUE COMPLETE STREET IMPROVEMENTS INCLUDING SEPARATED SIDEWALK, C",[SIDEWALK]
26,AT MAX FOSTER SPORTS COMPLEX AREA IN LIVINGSTON CONSTRUCT MULTI-USE PATH IMPROVEMENTS,[CONSTRUCT]
81,"DOWNTOWN LOS ANGELES, BROADWAY BETWEEN 4TH AND 6TH STREETS PEDESTRIAN SAFETY IMPROVEMENTS INCLUDING CURB EXTENSIONS, WIDENED SIDEWALK, CROSSWALK AND","[PEDESTRIAN, SAFETY, SIDEWALK]"
85,E. 20TH STREET FROM THE MALL TO THE END OF BUSINESS LANE (ADJACENT TO SR99) BIKEWAY IMPROVEMENTS,[]
99,EL CAMINO REAL (SR82) FROM ARROYO TO KAISER WAY COMPLETE STREET IMPROVEMENTS,[]


In [72]:
text.sample(10)

Unnamed: 0,project_title,summary
316,VARIOUS ROAD SEGMENTS THROUGHOUT CALAVERAS COUNTY (EXCLUDING STATE ROUTES AND THE CITY OF ANGELS CAMP) CONDUCT A ROADWAY SAFETY SIGNAGE AUDIT (RSSA),"[COUNTY, STATE, CITY, SAFETY, SIGNAGE]"
228,ORWOOD BRIDGE (28C-0024) AT INDIAN SLOUGH BRIDGE REPLACEMENT (TC),"[BRIDGE, BRIDGE]"
2,"16 LOCATIONS IN THE CITY OF FAIRFIELD INSTALL ADAPTIVE SIGNAL TIMING AND ADVANCED DILEMMA-ZONE DETECTION, AND ENHANCE THE VISIBILITY OF SIGNALS.","[CITY, INSTALL, SIGNAL, SIGNALS]"
276,SPERRY AVENUE FROM AMERICAN EAGLE AVENUE TO WARD AVENUE. ROAD REHABILITATION / RECONSTRUCTION WITH DIG OUTS AS NEEDED,[REHABILITATION]
289,"THE INTERSECTIONS OF MILITARY WEST AND WEST 5TH ST. AND HASTINGS DR. AND LONDON DR. PEDESTRIAN CROSSING IMPROVEMENTS, INCLUDING: ADA RAMPS RRFB SYSTE","[INTERSECTIONS, PEDESTRIAN]"
218,"ON MCKEE ROAD BETWEEN ROUTE 101 AND TOYON AVENUE IMPLEMENT SAFETY IMPROVEMENTS INCLUDING MEDIAN ISLANDS, ADA CURB RAMP, SPEED RADAR SIGN, ENHANCED CR",[SAFETY]
4,21 INTERSECTIONS; ON ALAMO DRIVE FROM MERCHANT STREET TO NUT TREE ROAD AND ON PEABODY ROAD FROM ELMIRA ROAD TO FOXBORO PARKWAY IMPROVE SIGNAL HARDWAR,"[INTERSECTIONS, SIGNAL]"
329,WOODLAKE AT THE INTERSECTION OF SIERRA AVE AND CASTLE ROCK STREET NEW ROUNDABOUT (TC),"[INTERSECTION, NEW]"
300,VARIOUS LOCAL STREETS WITHIN THE CITY OF LOS ANGELES TO RE-SURFACE LOCAL STREETS OF APPROXIMATELY 45 MILE IN TOTAL LENGTH AND CONSTRUCT ACCESS RAM,"[CITY, CONSTRUCT]"
103,FIFTEEN (15) SIGNALIZED INTERSECTIONS THROUGHOUT THE CITY INSTALL ADVANCED DILEMMA ZONE DETECTION,"[INTERSECTIONS, CITY, INSTALL]"


### Function for getting project types using np.where
(str format)

In [74]:
#using subset of full data
proj_sub = proj_all>>select(_.implementing_agency,
                            _.program_code_description,
                            _.project_title,
                            _.program_code,
                            _.improvement_type_description)

In [75]:
proj_sub.sample()

Unnamed: 0,implementing_agency,program_code_description,project_title,program_code,improvement_type_description
37,Metropolitan Transportation Commission,STBG-URBANIZED >200K IIJA,"BAY AREA REGIONAL: REGIONAL PLANNING ACTIVITIES AND PLANNING, PROGRAMMING AND MONITORING (PPM). PRIOR YEAR FUNDING PROGRAMMED ON REG170001 (TC)",Y230//Y230//Y230,Other//Other//Other


In [None]:
#sample of function
((utils.add_description(proj_sub, 'project_title'))).head(10)

Unnamed: 0,implementing_agency,program_code_description,project_title,program_code,improvement_type_description,project_method,project_type,other
0,Hawthorne,HIGHWAY SAFETY IMP PROG IIJA,120TH STREET FROM PRAIRIE AVENUE TO FELTON AVENUE. TRAFFIC SIGNAL UPGRADES AT NINE SIGNALIZED INTERSECTIONS AND IMPROVE CROSSINGS AND SIGNAGE.,YS30//YS30,Construction Engineering//Safety,Upgrade,Traffic Signals,Other Projects
1,Yucaipa,NATIONAL HIGHWAY PERF IIJA,"13TH STREET OVER WILSON CREEK FROM OAK GLEN ROAD TO KENTUCKY STREET, LWC 00L0017 REPLACE LOW WATER CROSSING WITH 2-LANE BRIDGE",Y001,Other,Replace,Bridge,Other Projects
2,Fairfield,HIGHWAY SAFETY IMP PROG IIJA,"16 LOCATIONS IN THE CITY OF FAIRFIELD INSTALL ADAPTIVE SIGNAL TIMING AND ADVANCED DILEMMA-ZONE DETECTION, AND ENHANCE THE VISIBILITY OF SIGNALS.",YS30//YS30,Construction Engineering//Safety,Install,Signals,Other Projects
3,Lake County,STBG IIJA OFF-SYSTEM BRIDGE,2.2 MILES NORTH OF STATE ROUTE 20 ON WITTER SPRINGS ROAD OVER COOPER CREEK. BR.# 14C0119 BRIDGE REPLACEMENT (TC),Y233//Y233//Y233//Y233,Bridge Replacement - No Added Capacity//Preliminary Engineering//Right of Way//Construction Engineering,Replace,Bridge,Other Projects
4,Vacaville,HIGHWAY SAFETY IMP PROG IIJA,21 INTERSECTIONS; ON ALAMO DRIVE FROM MERCHANT STREET TO NUT TREE ROAD AND ON PEABODY ROAD FROM ELMIRA ROAD TO FOXBORO PARKWAY IMPROVE SIGNAL HARDWAR,YS30//YS30,Construction Engineering//Safety,Improve,Signals,Other Projects
5,Benicia,HIGHWAY SAFETY IMP PROG IIJA,21 LOCATIONS IN CITY OF BENICIA UPGRADE EXISTING GUARDRAILS AND END TREATMENTS.,YS30//YS30,Construction Engineering//Safety,Upgrade,Guardrails,Other Projects
6,Pico Rivera,NATIONAL HIGHWAY PERF IIJA,3/8 MI WEST OF PARAMOUNT BLVDWASHINGTON BLVD OVER RIO HONDO.LOCAL BRIDGE # 53C0471 BRIDGE REPLACEMENT (DEMOLISH THE EXISTING 6 LANNE SUPERSTRUCTURE,Y001,Preliminary Engineering,Replace,Bridge,Other Projects
7,Marin County,STBG-URBANIZED >200K IIJA,"4 AT TERNERS DRIVE @ DRIVEWAY ENTRANCES TO MULTI-FAMILY HOUSING CLOSEST TO DONAHUE, 4 AT TERNERS DRIVE @ TERRACE WAY, 4 AT TERNERS DRIVE @ TERRACE DR",Y230,Preliminary Engineering,,Project,Other Projects
8,Marin County,HIGHWAY SAFETY IMP PROG IIJA,"59 TRAFFIC SIGNAL COUNTYWIDE. JURISDICTIONS INCLUDE CORTE MADERA, FAIRFAX, MILL VALLEY, NOVATO, SAN ANSELMO, SAN RAFAEL, SAUSALITO, MARIN COUNTY, LA",YS30//YS30,Construction Engineering//Safety,,Traffic Signals,Other Projects
9,Los Banos,CONGESTION MITIGATION IIJA,5-MILES STRETCH OF SR-152 SYNCHRONIZATION OF 14 TRAFFIC SIGNALS ON A FIVE MILE STRETCH OF SR-152 WITH INSTALLATION OF CCTV CAMERAS. (TC),Y400//Y400,Construction Engineering//Traffic Management/Engineering - HOV,Install,Traffic Signals,Other Projects


In [78]:
#how many entries with no tags
len((utils.add_description(proj_sub, 'project_title'))>>filter(_.project_type==('Project')))

97

In [79]:
#entries with no tag
((utils.add_description(proj_sub, 'project_title'))>>filter(_.project_type==('Project'))>>arrange(-_.project_method)).head(40)

Unnamed: 0,implementing_agency,program_code_description,project_title,program_code,improvement_type_description,project_method,project_type,other
34,Colton,NATIONAL HIGHWAY PERF IIJA,"BARTON ROAD OVERHEAD AT UNION PACIFIC RAILROAD, 0.25 MILE WEST OF I-215, BR. NO. 54C-0379 REPLACE OVERHEAD WITH TWO-LANE ROAD ACROSS DISCONTINUED RAI",Y001//Y001,Preliminary Engineering//Right of Way,Replace,Project,Other Projects
269,Los Angeles,NATIONAL HIGHWAY PERF IIJA,"SIXTH STREET VIADUCT OVER LA RIVER AND EAST SANTA ANNA FREEWAY, 53C1880 REPLACE STRUCT DEFICIENT VIADUCT WITH NEW (TC)",Y001,Right of Way,Replace,Project,Other Projects
281,Orange County Transportation Authority,STBG-URBANIZED >200K IIJA,STATE ROUTE 55 FROM INTERSTATE 405 TO INTERSTATE 5 ADD ONE MIXED-FLOW AND HIGH-OCCUPANCY LANE IN EACH DIRECTION AND REPAIR CHOKEPOINTS (TC),Y230//Y230//Y400,Right of Way//Utilities//Right of Way,Repair,Project,Other Projects
93,Fresno,TRANSP ALTERNATIVES FLEX IIJA,"EASTBOUND MCKINLEY FROM MILLBROOK AVENUE TO CLOVIS AVENUE ALONG THE NORTH BANK OF THE MILL DITCH CANAL. PAVED PATH, LIGHTING, BENCHES,FENCING, IRRIGA",Y300//Y300,Construction Engineering//Facilities for Pedestrians and Bicycles,Pave,Project,Other Projects
213,Oakland,TRANS ALTERNATIVES >200K IIJA,"OAKLAND - 14TH STREET SAFE ROUTES IN THE CITY. ON 14TH ST BETWEEN BRUSH ST AND OAK ST REDUCE TRAVEL LANES FROM 4 TO 2, ADD PAVED CLASS IV PROTECTED B",Y301,Facilities for Pedestrians and Bicycles,Pave,Project,Other Projects
19,Sacramento,CONGESTION MITIGATION IIJA,ALONG UNUSED RAIL CORRIDOR NEAR FREEPORT BLVD FROM SUTTERVILLE ROAD TO SOUTH OF MEADOWVIEW/POCKET ROAD (4.5 MILES) RAILS TO TRAILS PROJECT ; NEW CLAS,Y400,Facilities for Pedestrians and Bicycles,New,Project,Other Projects
15,Fresno County,TRANS ALTERNATIVES >200K IIJA,"ALONG GROVE AVENUE FROM PROSPECT AVENUE TO VALENTINE AVENUE, AND ALONG VALENTINE AVENUE FROM GROVE AVENUE TO NORTH AVENUE INSTALL AN ASPHALT CONCRETE",Y301//Y301,Construction Engineering//Facilities for Pedestrians and Bicycles,Install,Project,Other Projects
17,Stockton,HIGHWAY SAFETY IMP PROG IIJA,ALONG PACIFIC AVE. FROM CALAVERAS RIVER TRAIL TO WEST HAMMER LANE. INSTALL RAISED MEDIAN CURB,YS30,Safety,Install,Project,Other Projects
96,La Puente,HIGHWAY SAFETY IMP PROG IIJA,"EIGHT (8) CROSSWALK LOCATIONS THROUGHOUT THE CITY OF LA PUENTE. INSTALL RECTANGULAR RAPID FLASHING BEACONS (RRFBS), CURB EXTENSIONS, ADVANCED PAVEMEN",YS30,Safety,Install,Project,Other Projects
108,Sacramento,STBG-URBANIZED >200K IIJA,FLORIN ROAD BETWEEN GREENHAVEN DRIVE AND LUTHER DRIVE. CONVERT FROM PEDESTAL MOUNTED TO MAST ARMS AT 7 INTERSECTIONS; INSTALL ADVANCED DETECTION AT F,Y230//Y230//Y240//YS30//YS30,4R - No Added Capacity//Construction Engineering//4R - No Added Capacity//Construction Engineering//Safety,Install,Project,Other Projects


In [80]:
#checking remaining common words for no project tags
((utils.get_list_of_words(((utils.add_description(proj_sub, 'project_title'))>>filter(_.project_type=='Project')), "project_title")).value_counts()).head(60)

[nltk_data] Downloading package stopwords to /home/jovyan/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /home/jovyan/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


avenue           51
st               44
ave              39
road             36
city             23
blvd             20
street           18
east             17
construct        17
north            15
rd               15
county           14
intersections    13
south            13
drive            12
along            12
west             11
san              11
area             10
way              10
dr                9
lane              9
tc                9
path              8
bay               8
river             7
parkway           7
regional          6
install           6
redwood           6
trail             6
streets           6
creek             6
widen             6
mile              6
4                 6
state             6
including         6
project           6
2                 6
corridor          6
park              6
class             6
multiuse          5
interstate        5
pacific           5
sacramento        5
alameda           5
la                5
route             5


In [82]:
#sample of some descriptions that got cut off
((utils.add_description(proj_sub, 'project_title')))>>filter(_.project_type=='Project')>>filter(_.project_title.str.contains('AVENUE'))

Unnamed: 0,implementing_agency,program_code_description,project_title,program_code,improvement_type_description,project_method,project_type,other
15,Fresno County,TRANS ALTERNATIVES >200K IIJA,"ALONG GROVE AVENUE FROM PROSPECT AVENUE TO VALENTINE AVENUE, AND ALONG VALENTINE AVENUE FROM GROVE AVENUE TO NORTH AVENUE INSTALL AN ASPHALT CONCRETE",Y301//Y301,Construction Engineering//Facilities for Pedestrians and Bicycles,Install,Project,Other Projects
32,Palmdale,HIGHWAY SAFETY IMP PROG IIJA,AVENUE S-8 AND 40TH STREET EAST. CONVERT AN EXISTING FOUR WAY STOP TO A ROUDABOUT AT THE INTERSECTION OF AVENUE S-8 AND 40TH STREET EAST.. ORIGIO,YS30//YS30,Construction Engineering//Safety,,Project,Other Projects
36,Rialto,HIGHWAY SAFETY IMP PROG IIJA,BASELINE ROAD - WILLOW AVENUE TO MERIDIAN AVENUE CONSTRUCT A 12' WIDE RAISED CENTER MEDIAN AND ADA RAMP.,YS30//YS30,Construction Engineering//Safety,Construct,Project,Other Projects
40,Belmont,STBG-URBANIZED >200K IIJA,"BELMONT: CHULA VISTA FROM ALAMEDA DE LAS PULGAS TO RALSTON AVE, 6TH AVENUE FROM RALSTON AVENUE TO HILL STREET, 6TH AVENUE FROM EMMETT AVENUE TO HARBO",Y230,4R - Restoration & Rehabilitation,,Project,Other Projects
67,Sacramento County,HIGHWAY SAFETY IMP PROG IIJA,"COYLE AVENUE FROM WOODLEIGH TO DEWEY, HILLSDALE BOULEVARD FROM FRIZELL AVENUE TO MCCLOUD DRIVE, AND WATT AVENUE FROM ELKHORN BOULEVARD TO 700' NORTH",YS30//YS30,Construction Engineering//Safety,,Project,Other Projects
87,Morgan Hill,STBG-URBANIZED >200K IIJA,EAST DUNNE AVENUE BETWEEN LOWER THOMAS GRADE AND THE EASTERN CITY LIMIT. FEDERAL PARTICIPATING LOCATION SEGMENTS: 1) LOWER THOMAS GRADE TO FLAMING OA,Y230//Y230,4R - Restoration & Rehabilitation//Construction Engineering,,Project,Other Projects
92,Fairfield,PROJ TO REDUCE PM 2.5 EMI IIJA,EAST TABOR AVENUE ( BETWEEN DOVER AVENUE AND CLAY BANK ROAD) AND SUNSET AVENUE ( BETWEEN EAST TABOR AVENUE AND TRAVIS BOULEVARD) IMPLEMENT PEDESTRIA,Y003//Y003//Y240,Construction Engineering//Facilities for Pedestrians and Bicycles//Facilities for Pedestrians and Bicycles,,Project,Other Projects
93,Fresno,TRANSP ALTERNATIVES FLEX IIJA,"EASTBOUND MCKINLEY FROM MILLBROOK AVENUE TO CLOVIS AVENUE ALONG THE NORTH BANK OF THE MILL DITCH CANAL. PAVED PATH, LIGHTING, BENCHES,FENCING, IRRIGA",Y300//Y300,Construction Engineering//Facilities for Pedestrians and Bicycles,Pave,Project,Other Projects
122,Fresno,HIGHWAY SAFETY IMP PROG IIJA,"FRESNO STREET AND THOMAS AVENUE INTERSECTIONS, FRESNO STREET AND SAN JOSE AVENUE INTERSECTION, FRESNO AND R STREETS INTERSECTION, FRESNO STREET AND C",YS30//YS30,Construction Engineering//Safety,,Project,Other Projects
130,Santa Rosa,STBG-URBANIZED >200K IIJA,"HEARN AVENUE BETWEEN STONY POINT ROAD, DUTTON AVENUE AND RANGE AVENUE BETWEEN GUERNEVILLE ROAD AND JENNINGS AVENUE, AND RANGE AVENUE BETWEEN RUSSELL",Y230,4R - Maintenance Resurfacing,,Project,Other Projects


In [83]:
## sample of what third colum, `other` captures
((utils.add_description(proj_sub, 'project_title')))>>filter(_.other!='Other Projects')


Unnamed: 0,implementing_agency,program_code_description,project_title,program_code,improvement_type_description,project_method,project_type,other
27,El Dorado County,STBG-URBANIZED >200K IIJA,"AT MISSOURI FLAT RD 0.8 MILES SOUTH OF GOLDEN CENTER DRIVE CONSTRUCT BIKE TRAIL CONNECTION, EXTEND CURB, GUTTER, AND SIDEWALK. TC",Y230//Y230,4R - No Added Capacity//Construction Engineering,Construct,Sidewalk,Multiple Road
29,Palmdale,STBG-URBANIZED >200K IIJA,"AVE R BETWEEN SIERRA HIGHWAY AND 25TH STREET. AVE R COMPLETE STREET WITH SIDEWALKS GAP CLOSURES, BIKE LANES, ADA RAMPS AND ENHANCED CROSSWALKS",Y230,New Construction Roadway,,Complete Streets,Multiple Road
39,Bellflower,HIGHWAY SAFETY IMP PROG IIJA,"BELLFLOWER BOULEVARD FROM ARTESIA BOULEVARD TO SOUTH CITY LIMITS (350' SOUTH OF ROSE STREET). INSTALLING CLASS II BIKE LANES, RAISED MEDIAN, BULB-OUT",YS30//YS30,Construction Engineering//Safety,Install,Bike Lanes,Multiple Road
57,Alameda,TRANS ALTERNATIVES >200K IIJA,"CENTRAL BETWEEN PACIFIC AVENUE/MAIN STREET AND SHERMAN STREET/ENCINAL AVENUE. REDUCE ROADWAY FROM 4 TO 3 LANES FOR BIKE LANES AND SEPARATED BIKEWAY,",Y301,Preliminary Engineering,,Bike Lanes,Multiple Road
81,Los Angeles,TRANSP ALTERNATIVES FLEX IIJA,"DOWNTOWN LOS ANGELES, BROADWAY BETWEEN 4TH AND 6TH STREETS PEDESTRIAN SAFETY IMPROVEMENTS INCLUDING CURB EXTENSIONS, WIDENED SIDEWALK, CROSSWALK AND",Y300//Y300,Construction Engineering//Safety,Improve,Sidewalk,Multiple Road
85,Chico,CONGESTION MITIGATION IIJA,E. 20TH STREET FROM THE MALL TO THE END OF BUSINESS LANE (ADJACENT TO SR99) BIKEWAY IMPROVEMENTS,Y400,Construction Engineering,Improve,Bike Lanes,Multiple Road
95,Fresno,TRANS ALTERNATIVES >200K IIJA,"EASTSIDE OF BARTON AVENUE FROM CHURCH TO FLORENCE AND FLORENCE AVENUE FROM BARTON TO APPROXIMATELY 105 FT WEST OF JACKSON. INSTALL SIDEWALK, CURB RAM",Y301//Y301,Construction Engineering//Facilities for Pedestrians and Bicycles,Install,Sidewalk,Multiple Road
119,Fremont,SURFAC TRNSP BLK GRTS-FLX IIJA,FREMONT BLVD. AND WALNUT AVE. INTERSECTION CONNECT TO EXISTING ELEVATED/SEPARATED CLASS IV BIKEWAYS AND SIDEWALKS,Y240//Y240,Construction Engineering//Safety,,Sidewalk,Multiple Road
120,Metropolitan Transportation Commission,STBG-URBANIZED >200K IIJA,"FREMONT, RICHMOND, AND MARIN AND SONOMA COUNTIES, ALONG THE SMART CORRIDOR. BIKE SHARE CAPITAL PROGRAM (TC)",Y230,Other,,Bike Share Program,Multiple Road
142,El Dorado County,CONGESTION MITIGATION IIJA,"IN EL DORADO COUNTY, IN THE COMMUNITY OF POLLOCK PINES, PONY EXPRESS TRAIL, FROM SANDERS DRIVE TO SLY PARK ROAD. INSTALL 1.7 MILES OF CLASS 2 BIKE LA",Y400//Y400,Preliminary Engineering//Right of Way,Install,Bike Lanes,Multiple Road


## Combine for Public friendly title

In [85]:
title_new = (utils.add_description(proj_sub, 'project_title'))

In [86]:
title_new.head()

Unnamed: 0,implementing_agency,program_code_description,project_title,program_code,improvement_type_description,project_method,project_type,other
0,Hawthorne,HIGHWAY SAFETY IMP PROG IIJA,120TH STREET FROM PRAIRIE AVENUE TO FELTON AVENUE. TRAFFIC SIGNAL UPGRADES AT NINE SIGNALIZED INTERSECTIONS AND IMPROVE CROSSINGS AND SIGNAGE.,YS30//YS30,Construction Engineering//Safety,Upgrade,Traffic Signals,Other Projects
1,Yucaipa,NATIONAL HIGHWAY PERF IIJA,"13TH STREET OVER WILSON CREEK FROM OAK GLEN ROAD TO KENTUCKY STREET, LWC 00L0017 REPLACE LOW WATER CROSSING WITH 2-LANE BRIDGE",Y001,Other,Replace,Bridge,Other Projects
2,Fairfield,HIGHWAY SAFETY IMP PROG IIJA,"16 LOCATIONS IN THE CITY OF FAIRFIELD INSTALL ADAPTIVE SIGNAL TIMING AND ADVANCED DILEMMA-ZONE DETECTION, AND ENHANCE THE VISIBILITY OF SIGNALS.",YS30//YS30,Construction Engineering//Safety,Install,Signals,Other Projects
3,Lake County,STBG IIJA OFF-SYSTEM BRIDGE,2.2 MILES NORTH OF STATE ROUTE 20 ON WITTER SPRINGS ROAD OVER COOPER CREEK. BR.# 14C0119 BRIDGE REPLACEMENT (TC),Y233//Y233//Y233//Y233,Bridge Replacement - No Added Capacity//Preliminary Engineering//Right of Way//Construction Engineering,Replace,Bridge,Other Projects
4,Vacaville,HIGHWAY SAFETY IMP PROG IIJA,21 INTERSECTIONS; ON ALAMO DRIVE FROM MERCHANT STREET TO NUT TREE ROAD AND ON PEABODY ROAD FROM ELMIRA ROAD TO FOXBORO PARKWAY IMPROVE SIGNAL HARDWAR,YS30//YS30,Construction Engineering//Safety,Improve,Signals,Other Projects


In [87]:
#replacing with function
# title_new['project_name_new'] = title_new["project_method"] + " " + title_new["project_type"] + " in " + title_new["implementing_agency"]

In [88]:
title_new = utils.add_new_title(title_new, 'project_method', 'project_type', 'implementing_agency')

In [89]:
title_new.head()

Unnamed: 0,implementing_agency,program_code_description,project_title,program_code,improvement_type_description,project_method,project_type,other,project_name_new
0,Hawthorne,HIGHWAY SAFETY IMP PROG IIJA,120TH STREET FROM PRAIRIE AVENUE TO FELTON AVENUE. TRAFFIC SIGNAL UPGRADES AT NINE SIGNALIZED INTERSECTIONS AND IMPROVE CROSSINGS AND SIGNAGE.,YS30//YS30,Construction Engineering//Safety,Upgrade,Traffic Signals,Other Projects,Upgrade Traffic Signals in Hawthorne
1,Yucaipa,NATIONAL HIGHWAY PERF IIJA,"13TH STREET OVER WILSON CREEK FROM OAK GLEN ROAD TO KENTUCKY STREET, LWC 00L0017 REPLACE LOW WATER CROSSING WITH 2-LANE BRIDGE",Y001,Other,Replace,Bridge,Other Projects,Replace Bridge in Yucaipa
2,Fairfield,HIGHWAY SAFETY IMP PROG IIJA,"16 LOCATIONS IN THE CITY OF FAIRFIELD INSTALL ADAPTIVE SIGNAL TIMING AND ADVANCED DILEMMA-ZONE DETECTION, AND ENHANCE THE VISIBILITY OF SIGNALS.",YS30//YS30,Construction Engineering//Safety,Install,Signals,Other Projects,Install Signals in Fairfield
3,Lake County,STBG IIJA OFF-SYSTEM BRIDGE,2.2 MILES NORTH OF STATE ROUTE 20 ON WITTER SPRINGS ROAD OVER COOPER CREEK. BR.# 14C0119 BRIDGE REPLACEMENT (TC),Y233//Y233//Y233//Y233,Bridge Replacement - No Added Capacity//Preliminary Engineering//Right of Way//Construction Engineering,Replace,Bridge,Other Projects,Replace Bridge in Lake County
4,Vacaville,HIGHWAY SAFETY IMP PROG IIJA,21 INTERSECTIONS; ON ALAMO DRIVE FROM MERCHANT STREET TO NUT TREE ROAD AND ON PEABODY ROAD FROM ELMIRA ROAD TO FOXBORO PARKWAY IMPROVE SIGNAL HARDWAR,YS30//YS30,Construction Engineering//Safety,Improve,Signals,Other Projects,Improve Signals in Vacaville


### Export Sample Data with all columns

In [91]:
test_df = (utils.add_description(proj_all, 'project_title'))

In [92]:
test_df = utils.add_new_title(test_df, 'project_method', 'project_type', 'implementing_agency')

In [93]:
test_df = utils.title_column_names(test_df)

#### Export progress

In [94]:
# test_df.to_csv(f"{GCS_FILE_PATH}/FMIS_projects_wip2.csv")

### Using another column `improvement_type_description`

In [95]:
def update_no_matched(df, flag_col, desc_col):
    """
    function to itreate over projects that did not match the first time
    using an existing project's short description of project type. 
    """
    
    if df[flag_col] == "Project":
        return 
    
    
    return df

In [96]:
no_match = ((utils.add_description(proj_sub, 'project_title'))>>filter(_.project_type==('Project')))

In [97]:
## using improvement type description
clean_type_list_df = utils.get_list_of_words(no_match, "improvement_type_description") 


[nltk_data] Downloading package stopwords to /home/jovyan/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /home/jovyan/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [98]:
(clean_type_list_df.value_counts()).head(30)

construction                     34
engineering                      28
4r                               23
pedestrians                      20
bicycles                         18
maintenance                      14
engineeringsafety                13
preliminary                      12
restoration                      12
engineeringfacilities            11
added                             9
resurfacing4r                     9
safety                            8
facilities                        8
engineeringsafetyconstruction     8
rehabilitationconstruction        7
rehabilitation                    5
capacity                          5
roadwayconstruction               4
capacityconstruction              4
new                               4
engineeringpreliminary            3
resurfacingconstruction           3
roadway                           3
engineeringnew                    3
way                               3
right                             3
otherotherother             

In [99]:
no_match.head()

Unnamed: 0,implementing_agency,program_code_description,project_title,program_code,improvement_type_description,project_method,project_type,other,project_name_new
7,Marin County,STBG-URBANIZED >200K IIJA,"4 AT TERNERS DRIVE @ DRIVEWAY ENTRANCES TO MULTI-FAMILY HOUSING CLOSEST TO DONAHUE, 4 AT TERNERS DRIVE @ TERRACE WAY, 4 AT TERNERS DRIVE @ TERRACE DR",Y230,Preliminary Engineering,,Project,Other Projects,Project in Marin County
15,Fresno County,TRANS ALTERNATIVES >200K IIJA,"ALONG GROVE AVENUE FROM PROSPECT AVENUE TO VALENTINE AVENUE, AND ALONG VALENTINE AVENUE FROM GROVE AVENUE TO NORTH AVENUE INSTALL AN ASPHALT CONCRETE",Y301//Y301,Construction Engineering//Facilities for Pedestrians and Bicycles,Install,Project,Other Projects,Install Project in Fresno County
16,Santa Ana,TRANS ALTERNATIVES >200K IIJA,ALONG NEIGHBORHOOD STREETS ADJACENT TO DAVIS ELEMENTARY SCHOOL FROM 14TH STREET TO 15TH STREET. CONSTRUCT BULB-OUT AT CROSSINGS NEAR DAVIS ELEMENTARY,Y301,Construction Engineering,Construct,Project,Other Projects,Construct Project in Santa Ana
17,Stockton,HIGHWAY SAFETY IMP PROG IIJA,ALONG PACIFIC AVE. FROM CALAVERAS RIVER TRAIL TO WEST HAMMER LANE. INSTALL RAISED MEDIAN CURB,YS30,Safety,Install,Project,Other Projects,Install Project in Stockton
18,Caltrans,STBG-URBANIZED >200K IIJA,"ALONG THE I-880 CORRIDOR SF BAY AREA: REGIONWIDE: IMPLEMENT A COLLECTIVE APPROACH TO FREEWAY OPERATIONS AND MANAGEMENT, INCLUDING COMMUNICATIONS NETW",Y230,Preliminary Engineering,,Project,Other Projects,Project in Caltrans


In [100]:
no_match>>count(_.improvement_type_description)>>filter(_.n>1)>>arrange(-_.n)

Unnamed: 0,improvement_type_description,n
17,Construction Engineering//Safety,10
29,Preliminary Engineering,9
12,Construction Engineering//Facilities for Pedestrians and Bicycles,8
21,Facilities for Pedestrians and Bicycles,8
35,Safety,7
9,4R - Restoration & Rehabilitation//Construction Engineering,6
25,Other,6
7,4R - Restoration & Rehabilitation,4
3,4R - Maintenance Resurfacing//Construction Engineering,3
4,4R - No Added Capacity,3


In [101]:
no_match>>filter(_.improvement_type_description.str.contains('Pedestrians and Bicycles'))

Unnamed: 0,implementing_agency,program_code_description,project_title,program_code,improvement_type_description,project_method,project_type,other,project_name_new
15,Fresno County,TRANS ALTERNATIVES >200K IIJA,"ALONG GROVE AVENUE FROM PROSPECT AVENUE TO VALENTINE AVENUE, AND ALONG VALENTINE AVENUE FROM GROVE AVENUE TO NORTH AVENUE INSTALL AN ASPHALT CONCRETE",Y301//Y301,Construction Engineering//Facilities for Pedestrians and Bicycles,Install,Project,Other Projects,Install Project in Fresno County
19,Sacramento,CONGESTION MITIGATION IIJA,ALONG UNUSED RAIL CORRIDOR NEAR FREEPORT BLVD FROM SUTTERVILLE ROAD TO SOUTH OF MEADOWVIEW/POCKET ROAD (4.5 MILES) RAILS TO TRAILS PROJECT ; NEW CLAS,Y400,Facilities for Pedestrians and Bicycles,New,Project,Other Projects,New Project in Sacramento
26,Livingston,CONGESTION MITIGATION IIJA,AT MAX FOSTER SPORTS COMPLEX AREA IN LIVINGSTON CONSTRUCT MULTI-USE PATH IMPROVEMENTS,Y400//Y400,Construction Engineering//Facilities for Pedestrians and Bicycles,Construct,Project,Other Projects,Construct Project in Livingston
65,Bakersfield,CONGESTION MITIGATION IIJA,CONSTRUCT A MULTI-USE PATH (6 MILES) ALONG THE FRIANT-KERN CANAL IN BAKERSFIELD: BOUNDED BY 7TH STANDARD ROAD AND KERN RIVER PARKWAY. CONSTRUCT CLASS,Y400,Facilities for Pedestrians and Bicycles,Construct,Project,Other Projects,Construct Project in Bakersfield
82,Los Angeles,TRANS ALTERNATIVES >200K IIJA,"DTLA ARTS DISTRICT AREA FROM 2ND STREET ON THE NORTH, 7TH STREET ON THE SOUTH, ALAMEDA STREET ON THE WEST AND THE LA RIVER ON THE EAST WITH A FOCUS O",Y301//Y301,Construction Engineering//Facilities for Pedestrians and Bicycles,,Project,Other Projects,Project in Los Angeles
90,El Dorado County,STBG-URBANIZED >200K IIJA,EAST SAN BERNARDINO AVE FROM WEST OF UPPER TRUCKEE RIVER TO TAHOE PARADISE PARK IN THE COMMUNITY OF MYERS CONSTRUCT APPROXIMATELY 0.37 MILES OF CLASS,Y230//Y230//Y400//Y400,Construction Engineering//Facilities for Pedestrians and Bicycles//Construction Engineering//Facilities for Pedestrians and Bicycles,Construct,Project,Other Projects,Construct Project in El Dorado County
92,Fairfield,PROJ TO REDUCE PM 2.5 EMI IIJA,EAST TABOR AVENUE ( BETWEEN DOVER AVENUE AND CLAY BANK ROAD) AND SUNSET AVENUE ( BETWEEN EAST TABOR AVENUE AND TRAVIS BOULEVARD) IMPLEMENT PEDESTRIA,Y003//Y003//Y240,Construction Engineering//Facilities for Pedestrians and Bicycles//Facilities for Pedestrians and Bicycles,,Project,Other Projects,Project in Fairfield
93,Fresno,TRANSP ALTERNATIVES FLEX IIJA,"EASTBOUND MCKINLEY FROM MILLBROOK AVENUE TO CLOVIS AVENUE ALONG THE NORTH BANK OF THE MILL DITCH CANAL. PAVED PATH, LIGHTING, BENCHES,FENCING, IRRIGA",Y300//Y300,Construction Engineering//Facilities for Pedestrians and Bicycles,Pave,Project,Other Projects,Pave Project in Fresno
139,Citrus Heights,CONGESTION MITIGATION IIJA,"IN CITRUS HEIGHTS, BETWEEN ARCADE CREEK PARK PRESERVE AND WACHTEL WAY. CONSTRUCT A 2.9 MILE LONG CLASS 1 MULTI-USE TRAIL. TC",Y400,Facilities for Pedestrians and Bicycles,Construct,Project,Other Projects,Construct Project in Citrus Heights
146,Livingston,PROJ TO REDUCE PM 2.5 EMI IIJA,"IN LIVINGSTON, CONSTRUCT PHASE 2 MAX FOSTER MULTIUSE PATH (ALONG DWIGHT WAY, BETWEEN NORTH TRIGGER LN & TEHAMA DR,) CONSTRUCT A CONCRETE MULTIUSE PAT",Y003//Y003,Construction Engineering//Facilities for Pedestrians and Bicycles,Construct,Project,Other Projects,Construct Project in Livingston


In [102]:
no_match['improvement_type_description'] = no_match['improvement_type_description'].str.upper()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [103]:
test = utils.add_description(no_match, 'improvement_type_description')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [104]:
test.head()

Unnamed: 0,implementing_agency,program_code_description,project_title,program_code,improvement_type_description,project_method,project_type,other,project_name_new
7,Marin County,STBG-URBANIZED >200K IIJA,"4 AT TERNERS DRIVE @ DRIVEWAY ENTRANCES TO MULTI-FAMILY HOUSING CLOSEST TO DONAHUE, 4 AT TERNERS DRIVE @ TERRACE WAY, 4 AT TERNERS DRIVE @ TERRACE DR",Y230,PRELIMINARY ENGINEERING,,Project,Other Projects,Project in Marin County
15,Fresno County,TRANS ALTERNATIVES >200K IIJA,"ALONG GROVE AVENUE FROM PROSPECT AVENUE TO VALENTINE AVENUE, AND ALONG VALENTINE AVENUE FROM GROVE AVENUE TO NORTH AVENUE INSTALL AN ASPHALT CONCRETE",Y301//Y301,CONSTRUCTION ENGINEERING//FACILITIES FOR PEDESTRIANS AND BICYCLES,Construct,Pedestrian Safety Improvements,Other Projects,Install Project in Fresno County
16,Santa Ana,TRANS ALTERNATIVES >200K IIJA,ALONG NEIGHBORHOOD STREETS ADJACENT TO DAVIS ELEMENTARY SCHOOL FROM 14TH STREET TO 15TH STREET. CONSTRUCT BULB-OUT AT CROSSINGS NEAR DAVIS ELEMENTARY,Y301,CONSTRUCTION ENGINEERING,Construct,Project,Other Projects,Construct Project in Santa Ana
17,Stockton,HIGHWAY SAFETY IMP PROG IIJA,ALONG PACIFIC AVE. FROM CALAVERAS RIVER TRAIL TO WEST HAMMER LANE. INSTALL RAISED MEDIAN CURB,YS30,SAFETY,,Project,Other Projects,Install Project in Stockton
18,Caltrans,STBG-URBANIZED >200K IIJA,"ALONG THE I-880 CORRIDOR SF BAY AREA: REGIONWIDE: IMPLEMENT A COLLECTIVE APPROACH TO FREEWAY OPERATIONS AND MANAGEMENT, INCLUDING COMMUNICATIONS NETW",Y230,PRELIMINARY ENGINEERING,,Project,Other Projects,Project in Caltrans
