# Filtering for entries with Humboldt County

Humboldt County has the most obligations in the dataset, prompting us to wonder what types of obligations are going to this county.

In [1]:
import pandas as pd
from siuba import *
import altair as alt

In [2]:
from dla_utils import _dla_utils



In [3]:
df= pd.read_parquet("gs://calitp-analytics-data/data-analyses/dla/e-76Obligated/dla_df.parquet")

In [4]:
df.head()

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,adjusted_ac_requested,obligation_cat,active_transp,transit,bridge,street,freeway,infra_resiliency_er,congestion_relief,work_categories
0,Obligated,BPMP,5904(121),Humboldt County,2018-12-18,2018-12-18,2018-12-18,2018-12-18,2018-12-27,0,...,0.0,Large,0,0,1,0,0,0,0,1
1,Obligated,ER,32D0(008),Mendocino County,2018-12-17,2018-12-19,2018-12-20,2018-12-20,2018-12-27,11508,...,0.0,Large,0,0,0,0,0,1,0,1
2,Obligated,ER,4820(004),Humboldt County,2018-12-07,2018-12-21,2018-12-21,2018-12-21,2018-12-27,45499,...,0.0,Large,0,0,0,0,0,1,0,1
3,Obligated,CML,5924(244),Sacramento County,2018-12-11,2018-12-11,2018-12-21,2018-12-27,2018-12-27,207002,...,0.0,Large,1,0,0,0,0,1,0,2
4,Obligated,CML,5924(214),Sacramento County,2018-12-05,2018-12-11,2018-12-21,2018-12-27,2018-12-27,0,...,6130295.48,Large,0,0,0,1,0,0,0,1


In [5]:
df.primary_agency_name.value_counts().sort_values(ascending=False)

Humboldt County                      898
Santa Cruz County                    642
Los Angeles                          599
Los Angeles County                   540
Fresno                               522
                                    ... 
Tri Delta Transit                      1
Tustin                                 1
Antelope Valley Transit Authority      1
La Jolla Band of Luiseno Indians       1
Natomas Unified School District        1
Name: primary_agency_name, Length: 619, dtype: int64

In [6]:
df >> filter(_.primary_agency_name.str.contains("Humboldt County"))

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,adjusted_ac_requested,obligation_cat,active_transp,transit,bridge,street,freeway,infra_resiliency_er,congestion_relief,work_categories
0,Obligated,BPMP,5904(121),Humboldt County,2018-12-18,2018-12-18,2018-12-18,2018-12-18,2018-12-27,0,...,0.00,Large,0,0,1,0,0,0,0,1
2,Obligated,ER,4820(004),Humboldt County,2018-12-07,2018-12-21,2018-12-21,2018-12-21,2018-12-27,45499,...,0.00,Large,0,0,0,0,0,1,0,1
61,Obligated,BPMP,5904(157),Humboldt County,2018-11-13,2018-12-06,2018-12-14,2018-12-14,2018-12-14,21619,...,0.00,Large,0,0,1,0,0,1,0,2
179,Obligated,ER,32L0(132),Humboldt County,2018-11-09,2018-11-19,2018-11-19,2018-11-21,2018-11-28,80597,...,0.00,Large,0,0,0,1,0,1,0,2
180,Obligated,ER,32L0(136),Humboldt County,2018-11-07,2018-11-19,2018-11-19,2018-11-21,2018-11-28,7797,...,0.00,Large,0,0,0,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25047,Obligated,ATPLNI,5904(152),Humboldt County,2023-08-23,2023-08-23,2023-08-23,2023-08-23,2023-08-28,0,...,0.00,Large,0,0,0,0,0,0,0,0
25049,Obligated,ACSTER,32L0(114),Humboldt County,2023-05-30,2023-07-05,2023-07-06,2023-08-16,2023-08-21,0,...,43534.50,Large,0,0,0,0,0,1,0,1
25052,Obligated,BRLS,5904(024),Humboldt County,2023-03-28,2023-06-07,2023-08-02,2023-08-02,2023-08-03,16725751,...,0.00,Large,0,0,1,0,0,1,0,2
25054,Obligated,ER,32L0(125),Humboldt County,2023-05-19,2023-05-24,2023-05-25,2023-08-16,2023-08-21,-12089,...,0.00,Large,0,0,0,0,0,1,0,1


In [7]:
df_hc = df >> filter(_.primary_agency_name.str.contains("Humboldt County"))

In [8]:
print(len(df_hc))

898


In [9]:
df_hc >> count(_.locode) >> arrange(-_.n)

Unnamed: 0,locode,n
0,5904,898


### What Prefix Codes are fequently used?

In [10]:
(_dla_utils.find_top(df_hc))>>filter(_.variable=="prefix")

Unnamed: 0,value,count,variable
0,ER,564,prefix
1,ACSTER,92,prefix
2,BPMP,66,prefix
3,ACSTP,42,prefix
4,BRLO,34,prefix
5,BRLS,26,prefix
6,HSIPL,24,prefix
7,RPSTPL,20,prefix
8,BRLOZB,10,prefix
9,HRRRL,7,prefix


In [11]:
# ER funds, or emergency relief funds, are the most obligated type of funding to the county.

In [12]:
df_hc >> filter(_.prefix=='ER')>> summarize(avgfunds= _.total_requested.sum())

Unnamed: 0,avgfunds
0,12255001


#### What Project IDs occur the most?

In [13]:
df_hc >> group_by(_.prefix) >> count(_.projectID) >> arrange(-_.n)


Unnamed: 0,prefix,projectID,n
17,ER,32L0,238
15,ER,20A0,204
0,ACSTER,32L0,79
9,BPMP,5904,66
19,ER,4400,54
10,BRLO,5904,34
20,ER,4820,31
18,ER,40A0,28
12,BRLS,5904,26
23,HSIPL,5904,24


In [14]:
_dla_utils.get_nunique(df_hc, 'projectID', 'prefix')

Unnamed: 0,prefix,n
9,ER,7
1,ACSTP,4
0,ACSTER,3
2,ATPL,1
3,ATPLNI,1
4,BPMP,1
5,BRLO,1
6,BRLOZB,1
7,BRLS,1
8,BRLSZD,1


In [15]:
df_hc >> group_by(_.prefix) >> count(_.project_no) >> arrange(-_.n)


Unnamed: 0,prefix,project_no,n
110,BRLO,5904(126),10
120,BRLS,5904(127),8
108,BRLO,5904(112),7
109,BRLO,5904(113),7
128,ER,20A0(002),7
...,...,...,...
304,FBDLNI,5904(130),1
312,HSIPL,5904(197),1
313,HSIPL,5904(198),1
320,RPSTPLE,5904(128),1


In [16]:
_dla_utils.get_nunique(df_hc, 'project_no', 'prefix')

Unnamed: 0,prefix,n
9,ER,180
0,ACSTER,58
1,ACSTP,32
4,BPMP,16
5,BRLO,7
12,HSIPL,7
7,BRLS,6
13,RPSTPL,6
6,BRLOZB,2
11,HRRRL,2


In [17]:
# interesting that when we group by the 7-digit project numberm, the top is no longer ER. 

In [18]:
df_hc >> filter(_.prefix=='BRLO', _.project_no=='5904(126)') 


Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,adjusted_ac_requested,obligation_cat,active_transp,transit,bridge,street,freeway,infra_resiliency_er,congestion_relief,work_categories
2512,Obligated,BRLO,5904(126),Humboldt County,2017-11-08,2018-02-06,2018-02-09,2018-02-09,2018-02-15,346000,...,0.0,Large,0,0,1,0,0,1,0,2
4407,Obligated,BRLO,5904(126),Humboldt County,2017-04-27,2017-05-10,2017-05-11,2017-05-15,2017-05-16,505000,...,0.0,Large,0,0,1,0,0,1,0,2
4866,Obligated,BRLO,5904(126),Humboldt County,2016-10-31,2017-03-03,2017-03-06,2017-03-06,2017-03-07,130000,...,0.0,Large,0,0,1,0,0,1,0,2
9958,Obligated,BRLO,5904(126),Humboldt County,2015-02-02,2015-02-03,2015-02-04,2015-02-04,2015-02-04,109000,...,0.0,Large,0,0,1,0,0,1,0,2
11835,Obligated,BRLO,5904(126),Humboldt County,2014-05-23,2014-05-23,2014-05-29,2014-05-29,2014-05-30,-109000,...,0.0,Large,0,0,1,0,0,1,0,2
13127,Obligated,BRLO,5904(126),Humboldt County,2019-03-20,2019-04-02,2019-04-11,2019-04-12,2019-04-15,108577,...,0.0,Large,0,0,1,0,0,1,0,2
15631,Obligated,BRLO,5904(126),Humboldt County,2020-01-22,2020-01-22,2020-01-30,2020-01-30,2020-01-31,-21691,...,0.0,Large,0,0,1,0,0,1,0,2
20785,Obligated,BRLO,5904(126),Humboldt County,2022-02-04,2022-02-04,2022-02-04,2022-02-04,2022-02-08,0,...,0.0,Large,0,0,1,0,0,1,0,2
23419,Obligated,BRLO,5904(126),Humboldt County,2023-02-15,2023-02-15,2023-02-15,2023-02-15,2023-02-23,-6381,...,0.0,Large,0,0,1,0,0,1,0,2
23537,Obligated,BRLO,5904(126),Humboldt County,2023-02-27,2023-02-27,2023-02-27,2023-02-27,2023-03-06,-6381,...,0.0,Large,0,0,1,0,0,1,0,2


##### Using funds from two sources for the same location

In [19]:
#trying something here 
(df_hc
     >> filter(_.project_location.str.contains("Jacoby Creek"))
     >> count(_.project_no)
)

Unnamed: 0,project_no,n
0,5904(126),10
1,5904(165),3


In [20]:
df_hc >> filter(_.project_location.str.contains("Jacoby Creek"))

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,adjusted_ac_requested,obligation_cat,active_transp,transit,bridge,street,freeway,infra_resiliency_er,congestion_relief,work_categories
941,Obligated,BRLO,5904(165),Humboldt County,2018-07-03,2018-07-06,2018-07-10,2018-07-10,2018-07-19,22655,...,0.0,Large,0,0,1,0,0,1,0,2
2512,Obligated,BRLO,5904(126),Humboldt County,2017-11-08,2018-02-06,2018-02-09,2018-02-09,2018-02-15,346000,...,0.0,Large,0,0,1,0,0,1,0,2
4407,Obligated,BRLO,5904(126),Humboldt County,2017-04-27,2017-05-10,2017-05-11,2017-05-15,2017-05-16,505000,...,0.0,Large,0,0,1,0,0,1,0,2
4866,Obligated,BRLO,5904(126),Humboldt County,2016-10-31,2017-03-03,2017-03-06,2017-03-06,2017-03-07,130000,...,0.0,Large,0,0,1,0,0,1,0,2
9958,Obligated,BRLO,5904(126),Humboldt County,2015-02-02,2015-02-03,2015-02-04,2015-02-04,2015-02-04,109000,...,0.0,Large,0,0,1,0,0,1,0,2
11835,Obligated,BRLO,5904(126),Humboldt County,2014-05-23,2014-05-23,2014-05-29,2014-05-29,2014-05-30,-109000,...,0.0,Large,0,0,1,0,0,1,0,2
13127,Obligated,BRLO,5904(126),Humboldt County,2019-03-20,2019-04-02,2019-04-11,2019-04-12,2019-04-15,108577,...,0.0,Large,0,0,1,0,0,1,0,2
15631,Obligated,BRLO,5904(126),Humboldt County,2020-01-22,2020-01-22,2020-01-30,2020-01-30,2020-01-31,-21691,...,0.0,Large,0,0,1,0,0,1,0,2
20386,Obligated,BRLO,5904(165),Humboldt County,2021-09-13,2021-11-18,2021-12-07,2021-12-13,2021-12-20,-2762,...,0.0,Large,0,0,1,0,0,1,0,2
20785,Obligated,BRLO,5904(126),Humboldt County,2022-02-04,2022-02-04,2022-02-04,2022-02-04,2022-02-08,0,...,0.0,Large,0,0,1,0,0,1,0,2


In [21]:
(df_hc >> filter(_.project_location.str.contains("Jacoby Creek")) 
     >> group_by(_.project_no)
     >> count(_.prepared_y)
     >> arrange(_.prepared_y)
)

Unnamed: 0,project_no,prepared_y,n
0,5904(126),2014.0,1
1,5904(126),2015.0,1
2,5904(126),2016.0,1
3,5904(126),2017.0,2
8,5904(165),2018.0,1
4,5904(126),2019.0,1
5,5904(126),2020.0,1
9,5904(165),2021.0,1
6,5904(126),2022.0,1
10,5904(165),2022.0,1


In [22]:
## we have two project numbers for the same bridge, with different funds around the same time

In [23]:
df_hc >> filter(_.prefix=='BRLS', _.project_no=='5904(127)') 


Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,adjusted_ac_requested,obligation_cat,active_transp,transit,bridge,street,freeway,infra_resiliency_er,congestion_relief,work_categories
2983,Obligated,BRLS,5904(127),Humboldt County,2017-07-13,2017-11-13,2017-11-20,2017-11-21,2017-11-30,2180287,...,0.0,Large,0,0,1,0,0,1,0,2
6674,Obligated,BRLS,5904(127),Humboldt County,2016-05-17,2016-05-25,2016-05-26,2016-05-26,2016-06-02,138992,...,0.0,Large,0,0,1,0,0,1,0,2
8726,Obligated,BRLS,5904(127),Humboldt County,2015-07-13,2015-07-13,2015-07-14,2015-07-14,2015-07-15,17706,...,0.0,Large,0,0,1,0,0,1,0,2
9950,Obligated,BRLS,5904(127),Humboldt County,2015-02-02,2015-02-04,2015-02-05,2015-02-05,2015-02-05,203619,...,0.0,Large,0,0,1,0,0,1,0,2
13123,Obligated,BRLS,5904(127),Humboldt County,2019-03-12,2019-04-02,2019-04-12,2019-04-15,2019-04-16,192163,...,0.0,Large,0,0,1,0,0,1,0,2
18516,Obligated,BRLS,5904(127),Humboldt County,2021-02-10,2021-02-10,2021-02-18,2021-02-25,2021-03-02,0,...,0.0,Large,0,0,1,0,0,1,0,2
19267,Obligated,BRLS,5904(127),Humboldt County,2021-04-29,2021-05-19,2021-05-28,2021-06-01,2021-06-15,-48480,...,0.0,Large,0,0,1,0,0,1,0,2
23894,Obligated,BRLS,5904(127),Humboldt County,2023-01-19,2023-04-19,2023-04-21,2023-04-24,2023-05-02,45602,...,0.0,Large,0,0,1,0,0,1,0,2


In [24]:
#trying this again with Williams Creek
(df_hc
     >> filter(_.project_location.str.contains("Williams Creek"))
     >> group_by(_.prefix, _.project_no, _.prepared_y)
     >> count(_.project_no)
)

Unnamed: 0,prefix,project_no,prepared_y,n
0,BRLOZB,5904(032),2015.0,1
1,BRLOZB,5904(032),2016.0,1
2,BRLOZB,5904(032),2017.0,2
3,BRLS,5904(127),2015.0,2
4,BRLS,5904(127),2016.0,1
5,BRLS,5904(127),2017.0,1
6,BRLS,5904(127),2019.0,1
7,BRLS,5904(127),2021.0,2
8,BRLS,5904(127),2023.0,1
9,BRLS,5904(164),2018.0,1


In [25]:
df_hc >> filter(_.project_location.str.contains("Williams Creek"))

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,adjusted_ac_requested,obligation_cat,active_transp,transit,bridge,street,freeway,infra_resiliency_er,congestion_relief,work_categories
557,Obligated,BRLS,5904(164),Humboldt County,2018-07-26,2018-07-26,2018-09-11,2018-09-12,2018-09-16,47480,...,0.0,Large,0,0,0,0,0,0,0,0
2983,Obligated,BRLS,5904(127),Humboldt County,2017-07-13,2017-11-13,2017-11-20,2017-11-21,2017-11-30,2180287,...,0.0,Large,0,0,1,0,0,1,0,2
3963,Obligated,BRLOZB,5904(032),Humboldt County,2017-07-06,2017-07-06,2017-07-06,2017-07-06,2017-07-17,-55,...,0.0,Large,0,0,0,0,0,1,0,1
4326,Obligated,BRLOZB,5904(032),Humboldt County,2017-05-16,2017-05-16,2017-05-16,2017-05-16,2017-05-24,0,...,0.0,Large,0,0,0,0,0,1,0,1
6674,Obligated,BRLS,5904(127),Humboldt County,2016-05-17,2016-05-25,2016-05-26,2016-05-26,2016-06-02,138992,...,0.0,Large,0,0,1,0,0,1,0,2
7518,Obligated,BRLOZB,5904(032),Humboldt County,2016-01-27,2016-01-27,2016-01-27,2016-01-27,2016-01-29,-180,...,0.0,Large,0,0,0,0,0,1,0,1
8726,Obligated,BRLS,5904(127),Humboldt County,2015-07-13,2015-07-13,2015-07-14,2015-07-14,2015-07-15,17706,...,0.0,Large,0,0,1,0,0,1,0,2
9102,Obligated,BRLOZB,5904(032),Humboldt County,2015-05-04,2015-05-21,2015-06-08,2015-06-08,2015-06-09,115438,...,0.0,Large,0,0,0,0,0,1,0,1
9950,Obligated,BRLS,5904(127),Humboldt County,2015-02-02,2015-02-04,2015-02-05,2015-02-05,2015-02-05,203619,...,0.0,Large,0,0,1,0,0,1,0,2
13123,Obligated,BRLS,5904(127),Humboldt County,2019-03-12,2019-04-02,2019-04-12,2019-04-15,2019-04-16,192163,...,0.0,Large,0,0,1,0,0,1,0,2


In [26]:
#interesting. we have a bridge reconstruction going on with funds coming from two programs in the same timeframe 

### Looking into the Sequences

In [27]:
df_hc >> group_by(_.projectID, _.prefix) >> count(_.seq) >> arrange(-_.n)

Unnamed: 0,projectID,prefix,seq,n
25,32L0,ER,1,58
3,20A0,ER,2,49
27,32L0,ER,3,46
4,20A0,ER,3,38
5,20A0,ER,4,36
...,...,...,...,...
119,5904,RPSTPL,1,1
126,5904,RPSTPL,8,1
127,5904,RPSTPL,9,1
128,5904,RPSTPLE,4,1


In [28]:
# making sure this query looks at the number of seq for each 7-digit project code

df_hc >> filter(_.prefix == "ER", _.project_no == "32L0(329)")


Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,adjusted_ac_requested,obligation_cat,active_transp,transit,bridge,street,freeway,infra_resiliency_er,congestion_relief,work_categories
251,Obligated,ER,32L0(329),Humboldt County,2018-10-19,2018-10-19,2018-11-02,2018-11-06,2018-11-11,26293,...,0.0,Large,0,0,0,0,0,1,0,1
16969,Obligated,ER,32L0(329),Humboldt County,2020-07-07,2020-07-21,2020-07-28,2020-08-03,2020-08-06,301505,...,0.0,Large,0,0,0,0,0,1,0,1
17851,Obligated,ER,32L0(329),Humboldt County,2020-10-19,2020-10-19,2020-10-19,2020-12-24,2021-01-06,3355955,...,-3513616.45,Large,0,0,0,0,0,1,0,1


In [29]:
# There was a double entry here before removing duplicates. 

In [30]:
df_hc >> count(_.mpo)

Unnamed: 0,mpo,n
0,NON-MPO,898


In [31]:
df_hc >> filter(_.seq >= 9) >> arrange(_.project_no)

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,adjusted_ac_requested,obligation_cat,active_transp,transit,bridge,street,freeway,infra_resiliency_er,congestion_relief,work_categories
23536,Obligated,ER,20A0(002),Humboldt County,2023-03-13,2023-03-13,2023-03-13,2023-03-13,2023-03-22,-10,...,0.0,Large,0,0,0,0,0,1,0,1
24498,Obligated,ER,32L0(111),Humboldt County,2023-07-05,2023-07-05,2023-07-05,2023-07-05,2023-07-07,0,...,0.0,Large,0,0,0,1,0,0,0,1
22680,Obligated,ER,32L0(230),Humboldt County,2022-10-20,2022-10-20,2022-10-20,2022-10-20,2022-10-25,-2,...,0.0,Large,0,0,0,0,0,1,0,1
21860,Obligated,ER,32L0(326),Humboldt County,2022-07-20,2022-07-20,2022-07-20,2022-07-20,2022-07-21,0,...,0.0,Large,0,0,0,0,0,1,0,1
5280,Obligated,BRLSZD,5904(006),Humboldt County,2016-12-08,2016-12-08,2016-12-08,2016-12-08,2016-12-14,-228,...,0.0,Large,0,0,1,0,0,1,0,2
9844,Obligated,BRLSZD,5904(006),Humboldt County,2014-05-30,2015-02-04,2015-02-05,2015-02-20,2015-02-20,-27390,...,0.0,Large,0,0,1,0,0,1,0,2
3963,Obligated,BRLOZB,5904(032),Humboldt County,2017-07-06,2017-07-06,2017-07-06,2017-07-06,2017-07-17,-55,...,0.0,Large,0,0,0,0,0,1,0,1
4326,Obligated,BRLOZB,5904(032),Humboldt County,2017-05-16,2017-05-16,2017-05-16,2017-05-16,2017-05-24,0,...,0.0,Large,0,0,0,0,0,1,0,1
7518,Obligated,BRLOZB,5904(032),Humboldt County,2016-01-27,2016-01-27,2016-01-27,2016-01-27,2016-01-29,-180,...,0.0,Large,0,0,0,0,0,1,0,1
5412,Obligated,RPSTPL,5904(106),Humboldt County,2016-11-17,2016-11-17,2016-11-17,2016-11-17,2016-11-18,0,...,0.0,Large,1,0,0,0,0,0,0,1


In [32]:
#curious to see where/when the sequences fall on a timeline for different projects
(alt.Chart(df_hc>> filter(_.seq >= 8)).mark_bar().encode(x="prepared_y", y = "seq", color="project_no"))


### ER Funds

#### Using ER funds for Mattole Road 

In [33]:
#just saw "Mattole Road" on the first entry. Searching using this string. 
df_hc >> filter(_.project_location.str.contains("Mattole R"))

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,adjusted_ac_requested,obligation_cat,active_transp,transit,bridge,street,freeway,infra_resiliency_er,congestion_relief,work_categories
2,Obligated,ER,4820(004),Humboldt County,2018-12-07,2018-12-21,2018-12-21,2018-12-21,2018-12-27,45499,...,0.00,Large,0,0,0,0,0,1,0,1
61,Obligated,BPMP,5904(157),Humboldt County,2018-11-13,2018-12-06,2018-12-14,2018-12-14,2018-12-14,21619,...,0.00,Large,0,0,1,0,0,1,0,2
179,Obligated,ER,32L0(132),Humboldt County,2018-11-09,2018-11-19,2018-11-19,2018-11-21,2018-11-28,80597,...,0.00,Large,0,0,0,1,0,1,0,2
180,Obligated,ER,32L0(136),Humboldt County,2018-11-07,2018-11-19,2018-11-19,2018-11-21,2018-11-28,7797,...,0.00,Large,0,0,0,0,0,1,0,1
182,Obligated,ER,32L0(164),Humboldt County,2018-11-15,2018-11-19,2018-11-19,2018-11-21,2018-11-28,55422,...,0.00,Large,0,0,0,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24498,Obligated,ER,32L0(111),Humboldt County,2023-07-05,2023-07-05,2023-07-05,2023-07-05,2023-07-07,0,...,0.00,Large,0,0,0,1,0,0,0,1
24623,Obligated,ACSTER,32L0(114),Humboldt County,2023-05-30,2023-07-05,2023-07-06,2023-08-16,2023-08-21,0,...,43534.50,Large,0,0,0,0,0,1,0,1
24625,Obligated,BRLS,5904(024),Humboldt County,2023-03-28,2023-06-07,2023-08-02,2023-08-02,2023-08-03,16725751,...,0.00,Large,0,0,1,0,0,1,0,2
25049,Obligated,ACSTER,32L0(114),Humboldt County,2023-05-30,2023-07-05,2023-07-06,2023-08-16,2023-08-21,0,...,43534.50,Large,0,0,0,0,0,1,0,1


In [34]:
(df_hc >> filter(_.project_location.str.contains("Mattole R"))
     >> count(_.prefix))

Unnamed: 0,prefix,n
0,ACSTER,37
1,ACSTP,7
2,BPMP,6
3,BRLS,5
4,ER,180


In [35]:
(df_hc >> filter(_.project_location.str.contains("Mattole R"))
     >> group_by(_.prefix, _.prepared_y)
     >> count(_.project_no)
     >> arrange(-_.n)
)

Unnamed: 0,prefix,prepared_y,project_no,n
44,BPMP,2022.00,5904(157),3
128,ER,2020.00,4400(017),3
30,ACSTER,2022.00,40A0(059),2
31,ACSTER,2023.00,32L0(114),2
33,ACSTER,2023.00,40A0(059),2
...,...,...,...,...
174,ER,2023.00,32L0(119),1
176,ER,2023.00,32L0(252),1
177,ER,2023.00,32L0(253),1
179,ER,2023.00,40A0(015),1


In [36]:
(df_hc >> filter(_.project_location.str.contains("Mattole R"))
     >> group_by(_.prepared_y, _.prefix)
     >> count(_.type_of_work)
     >> arrange(-_.n)
)

Unnamed: 0,prepared_y,prefix,type_of_work,n
21,2018.00,ER,Storm Damage Restoration,26
38,2019.00,ER,Permanent Restoration,18
83,2023.00,ER,Storm Damage Restoration,16
19,2018.00,ER,Permanent Restoration,15
71,2022.00,ER,Permanent Restoration,12
...,...,...,...,...
75,2022.00,ER,Trench Stabilization Per Geotechnical Report,1
78,2023.00,ER,Design Wire Wall Per Geotechnical Investigation,1
79,2023.00,ER,"Grind Existing Roadway, Recompact/augment Road...",1
82,2023.00,ER,Stabilize Roadway With Earth Retaining Structu...,1


#### ER project numbers

In [37]:
df_hc >> filter(_.prefix=="ER") >> count(_.projectID) >> arrange(-_.n)

Unnamed: 0,projectID,n
3,32L0,238
1,20A0,204
5,4400,54
6,4820,31
4,40A0,28
2,28L4,6
0,15J7,3


In [38]:
df_hc >> filter(_.prefix == "ER") >> count(_.project_no) >> arrange(-_.n)

Unnamed: 0,project_no,n
4,20A0(002),7
10,20A0(008),6
11,20A0(009),6
12,20A0(010),6
14,20A0(012),6
...,...,...
144,4400(020),1
162,4400(062),1
163,4400(071),1
167,4820(007),1


### What projects are grouped by date

In [39]:
df_hc.sample(3)

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,adjusted_ac_requested,obligation_cat,active_transp,transit,bridge,street,freeway,infra_resiliency_er,congestion_relief,work_categories
8991,Obligated,BRLOZB,5904(094),Humboldt County,2015-05-29,2015-06-11,2015-06-12,2015-06-12,2015-06-15,-176955,...,0.0,Large,0,0,1,0,0,1,0,2
20617,Obligated,ER,32L0(283),Humboldt County,2022-01-03,2022-01-10,2022-01-18,2022-01-25,2022-01-27,-2132,...,0.0,Large,0,0,0,0,0,1,0,1
8778,Obligated,ER,20A0(013),Humboldt County,2015-07-08,2015-07-08,2015-07-09,2015-07-09,2015-07-09,1946,...,0.0,Large,0,0,0,0,0,1,0,1


In [40]:
(df_hc
    >> count(_.prepared_date) 
    >> arrange(-_.n)
    >> filter(_.n >= 5)
)

Unnamed: 0,prepared_date,n
354,2022-02-04,38
119,2018-03-22,17
120,2018-03-29,16
10,2014-05-22,13
409,2023-02-13,12
209,2019-09-04,10
141,2018-07-30,9
231,2019-11-04,9
413,2023-02-23,9
117,2018-03-19,8


#### March 22, 2018: 17 ER Entries

In [41]:
(df_hc
    >> filter(_.prepared_date=='2018-03-22') 
    >> count(_.prefix)
    >> arrange(-_.n)
)

Unnamed: 0,prefix,n
0,ER,17


In [42]:
(df_hc
    >> filter(_.prepared_date=='2018-03-22') 
)

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,adjusted_ac_requested,obligation_cat,active_transp,transit,bridge,street,freeway,infra_resiliency_er,congestion_relief,work_categories
1767,Obligated,ER,4400(050),Humboldt County,2018-03-22,2018-03-22,2018-04-27,2018-04-27,2018-05-01,-899,...,0.0,Large,0,0,0,0,0,1,0,1
1840,Obligated,ER,20A0(053),Humboldt County,2018-03-22,2018-03-23,2018-04-09,2018-04-09,2018-04-20,-14591,...,0.0,Large,0,0,0,0,0,1,0,1
1913,Obligated,ER,20A0(042),Humboldt County,2018-03-22,2018-03-23,2018-04-06,2018-04-06,2018-04-16,5183,...,0.0,Large,0,0,0,0,0,1,0,1
1914,Obligated,ER,20A0(048),Humboldt County,2018-03-22,2018-03-23,2018-04-09,2018-04-09,2018-04-16,-17213,...,0.0,Large,0,0,0,0,0,1,0,1
1915,Obligated,ER,20A0(051),Humboldt County,2018-03-22,2018-03-23,2018-04-09,2018-04-09,2018-04-16,-17493,...,0.0,Large,0,0,0,0,0,1,0,1
1917,Obligated,ER,20A0(017),Humboldt County,2018-03-22,2018-03-23,2018-04-09,2018-04-09,2018-04-16,-91981,...,0.0,Large,0,0,0,0,0,1,0,1
1961,Obligated,ER,20A0(055),Humboldt County,2018-03-22,2018-03-23,2018-04-03,2018-04-03,2018-04-13,7261,...,0.0,Large,0,0,0,0,0,1,0,1
2011,Obligated,ER,20A0(028),Humboldt County,2018-03-22,2018-03-23,2018-04-02,2018-04-02,2018-04-10,2645,...,0.0,Large,0,0,0,0,0,1,0,1
2013,Obligated,ER,20A0(031),Humboldt County,2018-03-22,2018-03-23,2018-04-02,2018-04-02,2018-04-10,7602,...,0.0,Large,0,0,0,0,0,1,0,1
2015,Obligated,ER,20A0(037),Humboldt County,2018-03-22,2018-03-23,2018-04-03,2018-04-03,2018-04-10,11398,...,0.0,Large,0,0,0,0,0,1,0,1


* Most of these are for Mattole Road, similar types of works, same phase, different sectiopns of the road

In [43]:
#checking to see if there were doubles in project location
(df_hc
    >> filter(_.prepared_date=='2018-03-22') 
    >> count(_.project_location)
)

Unnamed: 0,project_location,n
0,Alderpoint Road Pm 42.00 To 42.46,1
1,"Alderpoint Road, Pm 36.98",1
2,"Kneeland Road, Pm 0.26",1
3,"Kneeland Road, Pm 6.45",1
4,"Mattole Road (bull Creek), Pm 3.55",1
5,"Mattole Road, Pm 13.67",1
6,"Mattole Road, Pm 17.83",1
7,"Mattole Road, Pm 2.11",1
8,"Mattole Road, Pm 26.96",1
9,"Mattole Road, Pm 27.10",1


In [44]:
(df_hc
    >> filter(_.prepared_date=='2018-03-22') 
    >> count(_.type_of_work)
)

Unnamed: 0,type_of_work,n
0,Emergency Relief,1
1,Permanent Restoration,4
2,Storm Damage Restoration,12


In [45]:
(df_hc 
    >> group_by(_.prefix)
    >> count(_.prepared_y) 
    >> arrange(-_.n)
    >> filter(_.n > 5)
)

Unnamed: 0,prefix,prepared_y,n
59,ER,2018.0,146
64,ER,2023.0,94
63,ER,2022.0,86
60,ER,2019.0,82
61,ER,2020.0,47
0,ACSTER,2019.0,39
56,ER,2015.0,31
62,ER,2021.0,31
55,ER,2014.0,30
3,ACSTER,2022.0,20


In [46]:
## looking for more instances of this project code 
df_hc >> filter(_.prefix == "ER", _.project_no == '32L0(434)')

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,adjusted_ac_requested,obligation_cat,active_transp,transit,bridge,street,freeway,infra_resiliency_er,congestion_relief,work_categories
14874,Obligated,ER,32L0(434),Humboldt County,2019-10-03,2019-10-03,2019-10-07,2019-10-09,2019-10-15,4337,...,-4596.76,Large,0,0,0,0,0,1,0,1
15622,Obligated,ER,32L0(434),Humboldt County,2019-11-05,2019-11-05,2020-01-08,2020-01-16,2020-01-21,-4337,...,0.0,Large,0,0,0,0,0,1,0,1


In [47]:
(df_hc 
    >> filter(_.prefix == "ER")
    >> group_by(_.prepared_y)
)

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,adjusted_ac_requested,obligation_cat,active_transp,transit,bridge,street,freeway,infra_resiliency_er,congestion_relief,work_categories
2,Obligated,ER,4820(004),Humboldt County,2018-12-07,2018-12-21,2018-12-21,2018-12-21,2018-12-27,45499,...,0.00,Large,0,0,0,0,0,1,0,1
179,Obligated,ER,32L0(132),Humboldt County,2018-11-09,2018-11-19,2018-11-19,2018-11-21,2018-11-28,80597,...,0.00,Large,0,0,0,1,0,1,0,2
180,Obligated,ER,32L0(136),Humboldt County,2018-11-07,2018-11-19,2018-11-19,2018-11-21,2018-11-28,7797,...,0.00,Large,0,0,0,0,0,1,0,1
181,Obligated,ER,32L0(138),Humboldt County,2018-11-16,2018-11-19,2018-11-19,2018-11-21,2018-11-28,11764,...,0.00,Large,0,0,0,0,0,1,0,1
182,Obligated,ER,32L0(164),Humboldt County,2018-11-15,2018-11-19,2018-11-19,2018-11-21,2018-11-28,55422,...,0.00,Large,0,0,0,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24498,Obligated,ER,32L0(111),Humboldt County,2023-07-05,2023-07-05,2023-07-05,2023-07-05,2023-07-07,0,...,0.00,Large,0,0,0,1,0,0,0,1
24627,Obligated,ER,32L0(125),Humboldt County,2023-05-19,2023-05-24,2023-05-25,2023-08-16,2023-08-21,-12089,...,0.00,Large,0,0,0,0,0,1,0,1
24629,Obligated,ER,32L0(295),Humboldt County,2023-08-24,2023-08-24,2023-08-24,2023-08-24,2023-08-28,0,...,0.00,Large,0,0,0,0,0,1,0,1
25054,Obligated,ER,32L0(125),Humboldt County,2023-05-19,2023-05-24,2023-05-25,2023-08-16,2023-08-21,-12089,...,0.00,Large,0,0,0,0,0,1,0,1


### Looking at Projects with 32L0


In [48]:
df_hc >> filter(_.projectID == "32L0")
             

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,adjusted_ac_requested,obligation_cat,active_transp,transit,bridge,street,freeway,infra_resiliency_er,congestion_relief,work_categories
179,Obligated,ER,32L0(132),Humboldt County,2018-11-09,2018-11-19,2018-11-19,2018-11-21,2018-11-28,80597,...,0.00,Large,0,0,0,1,0,1,0,2
180,Obligated,ER,32L0(136),Humboldt County,2018-11-07,2018-11-19,2018-11-19,2018-11-21,2018-11-28,7797,...,0.00,Large,0,0,0,0,0,1,0,1
181,Obligated,ER,32L0(138),Humboldt County,2018-11-16,2018-11-19,2018-11-19,2018-11-21,2018-11-28,11764,...,0.00,Large,0,0,0,0,0,1,0,1
182,Obligated,ER,32L0(164),Humboldt County,2018-11-15,2018-11-19,2018-11-19,2018-11-21,2018-11-28,55422,...,0.00,Large,0,0,0,0,0,1,0,1
183,Obligated,ER,32L0(219),Humboldt County,2018-11-07,2018-11-19,2018-11-19,2018-11-21,2018-11-28,13724,...,0.00,Large,0,0,0,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24627,Obligated,ER,32L0(125),Humboldt County,2023-05-19,2023-05-24,2023-05-25,2023-08-16,2023-08-21,-12089,...,0.00,Large,0,0,0,0,0,1,0,1
24629,Obligated,ER,32L0(295),Humboldt County,2023-08-24,2023-08-24,2023-08-24,2023-08-24,2023-08-28,0,...,0.00,Large,0,0,0,0,0,1,0,1
25049,Obligated,ACSTER,32L0(114),Humboldt County,2023-05-30,2023-07-05,2023-07-06,2023-08-16,2023-08-21,0,...,43534.50,Large,0,0,0,0,0,1,0,1
25054,Obligated,ER,32L0(125),Humboldt County,2023-05-19,2023-05-24,2023-05-25,2023-08-16,2023-08-21,-12089,...,0.00,Large,0,0,0,0,0,1,0,1


In [49]:
df_hc >> filter(_.projectID == "32L0", _.type_of_work.str.contains("Roadway"))

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,adjusted_ac_requested,obligation_cat,active_transp,transit,bridge,street,freeway,infra_resiliency_er,congestion_relief,work_categories
179,Obligated,ER,32L0(132),Humboldt County,2018-11-09,2018-11-19,2018-11-19,2018-11-21,2018-11-28,80597,...,0.0,Large,0,0,0,1,0,1,0,2
1126,Obligated,ER,32L0(111),Humboldt County,2018-06-27,2018-06-27,2018-06-27,2018-06-27,2018-07-03,7657,...,-8262.69,Large,0,0,0,1,0,0,0,1
1130,Obligated,ER,32L0(127),Humboldt County,2018-06-25,2018-06-25,2018-06-28,2018-06-29,2018-07-03,23672,...,-25544.51,Large,0,0,0,1,0,1,0,2
2218,Obligated,ACSTP,32L0(127),Humboldt County,2018-01-19,2018-02-16,2018-03-06,2018-03-06,2018-03-16,0,...,25544.51,Large,0,0,0,1,0,1,0,2
2245,Obligated,ACSTP,32L0(111),Humboldt County,2018-01-19,2018-02-16,2018-03-06,2018-03-06,2018-03-15,0,...,8262.69,Large,0,0,0,1,0,0,0,1
13035,Obligated,ER,32L0(117),Humboldt County,2018-12-20,2018-12-21,2019-01-22,2019-02-25,2019-02-28,23929,...,0.0,Large,0,0,0,1,0,1,0,2
13037,Obligated,ACSTP,32L0(128),Humboldt County,2019-03-12,2019-03-12,2019-05-10,2019-05-20,2019-05-22,0,...,104012.8,Large,0,0,0,1,0,1,0,2
13038,Obligated,ER,32L0(134),Humboldt County,2019-01-14,2019-01-14,2019-02-25,2019-03-04,2019-03-06,50649,...,0.0,Large,0,0,0,1,0,1,0,2
13043,Obligated,ER,32L0(173),Humboldt County,2019-01-31,2019-02-05,2019-03-19,2019-03-26,2019-03-27,8788,...,0.0,Large,0,0,0,1,0,1,0,2
13054,Obligated,ER,32L0(293),Humboldt County,2019-01-09,2019-01-10,2019-02-15,2019-02-15,2019-02-26,6781,...,0.0,Large,0,0,0,1,0,1,0,2


In [50]:
df_hc_prefix_count = df_hc >> filter(_.projectID == "32L0") >> count(_.prefix) >> arrange(-_.n)

In [51]:
df_hc_prefix_count

Unnamed: 0,prefix,n
2,ER,238
0,ACSTER,79
1,ACSTP,15


In [52]:
(alt.Chart(df_hc_prefix_count).mark_bar().encode(
    x = "prefix",
    y = "n"))

In [53]:
df_hc >> filter(_.projectID == "32L0") >> count(_.seq) >> arrange(-_.n)

Unnamed: 0,seq,n
0,1,71
1,2,71
2,3,57
3,4,51
4,5,38
5,6,21
6,7,12
7,8,8
8,9,3


### Looking at Projects with 5904

In [54]:
df_hc >> filter(_.projectID == "5904")

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,adjusted_ac_requested,obligation_cat,active_transp,transit,bridge,street,freeway,infra_resiliency_er,congestion_relief,work_categories
0,Obligated,BPMP,5904(121),Humboldt County,2018-12-18,2018-12-18,2018-12-18,2018-12-18,2018-12-27,0,...,0.00,Large,0,0,1,0,0,0,0,1
61,Obligated,BPMP,5904(157),Humboldt County,2018-11-13,2018-12-06,2018-12-14,2018-12-14,2018-12-14,21619,...,0.00,Large,0,0,1,0,0,1,0,2
277,Obligated,BPMP,5904(121),Humboldt County,2018-11-02,2018-11-02,2018-11-02,2018-11-06,2018-11-07,-81905,...,0.00,Large,0,0,1,0,0,0,0,1
278,Obligated,RPSTPL,5904(143),Humboldt County,2018-07-06,2018-10-01,2018-11-06,2018-11-07,2018-11-07,550000,...,0.00,Large,1,0,0,0,0,0,0,1
467,Obligated,BPMP,5904(135),Humboldt County,2018-09-28,2018-09-28,2018-09-28,2018-10-10,2018-10-15,-16797,...,0.00,Large,0,0,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24915,HQ,HSIPL,5904(197),Humboldt County,2023-06-29,2023-09-05,NaT,NaT,NaT,10800,...,0.00,Large,1,0,0,1,0,1,0,3
24916,HQ,HSIPL,5904(198),Humboldt County,2023-06-29,2023-09-05,NaT,NaT,NaT,72000,...,0.00,Large,0,0,0,1,0,1,0,2
25039,Obligated,BRLO,5904(167),Humboldt County,2023-02-03,2023-07-03,2023-08-02,2023-08-02,2023-08-03,0,...,41083.77,Large,0,0,0,0,0,0,0,0
25047,Obligated,ATPLNI,5904(152),Humboldt County,2023-08-23,2023-08-23,2023-08-23,2023-08-23,2023-08-28,0,...,0.00,Large,0,0,0,0,0,0,0,0


In [55]:
df_hc >> filter(_.projectID == "5904", _.type_of_work.str.contains("Bridge"))

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,adjusted_ac_requested,obligation_cat,active_transp,transit,bridge,street,freeway,infra_resiliency_er,congestion_relief,work_categories
0,Obligated,BPMP,5904(121),Humboldt County,2018-12-18,2018-12-18,2018-12-18,2018-12-18,2018-12-27,0,...,0.00,Large,0,0,1,0,0,0,0,1
61,Obligated,BPMP,5904(157),Humboldt County,2018-11-13,2018-12-06,2018-12-14,2018-12-14,2018-12-14,21619,...,0.00,Large,0,0,1,0,0,1,0,2
277,Obligated,BPMP,5904(121),Humboldt County,2018-11-02,2018-11-02,2018-11-02,2018-11-06,2018-11-07,-81905,...,0.00,Large,0,0,1,0,0,0,0,1
467,Obligated,BPMP,5904(135),Humboldt County,2018-09-28,2018-09-28,2018-09-28,2018-10-10,2018-10-15,-16797,...,0.00,Large,0,0,1,0,0,0,0,1
468,Obligated,BPMP,5904(139),Humboldt County,2018-08-31,2018-10-01,2018-10-10,2018-10-12,2018-10-15,-123758,...,0.00,Large,0,0,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23891,Obligated,BRLO,5904(112),Humboldt County,2023-03-21,2023-04-19,2023-05-22,2023-05-22,2023-05-24,2022500,...,0.00,Large,0,0,1,0,0,1,0,2
23894,Obligated,BRLS,5904(127),Humboldt County,2023-01-19,2023-04-19,2023-04-21,2023-04-24,2023-05-02,45602,...,0.00,Large,0,0,1,0,0,1,0,2
24489,Obligated,BPMP,5904(155),Humboldt County,2023-07-11,2023-07-11,2023-07-11,2023-07-11,2023-07-13,-76254,...,0.00,Large,0,0,1,0,0,1,0,2
24625,Obligated,BRLS,5904(024),Humboldt County,2023-03-28,2023-06-07,2023-08-02,2023-08-02,2023-08-03,16725751,...,0.00,Large,0,0,1,0,0,1,0,2


In [56]:
## 88 rows of all bridge related funding obligations- appears from this small subset that HBPLOCAL is the FTIP Code, 
### yet, we have various Prefix Codes.

In [57]:
(df_hc 
    >> filter(_.projectID == "5904", _.type_of_work.str.contains("Bridge"))
    >> count(_.project_no) >> arrange(-_.n)
)

Unnamed: 0,project_no,n
11,5904(126),10
12,5904(127),8
3,5904(112),7
4,5904(113),7
5,5904(118),6
16,5904(139),6
18,5904(141),6
20,5904(149),6
23,5904(156),6
24,5904(157),6


In [58]:
## mplotting to see the timeline of one project 5904(126)
(alt.Chart(df_hc >> filter(_.project_no == "5904(126)")).mark_bar().encode(
    x= "prepared_date", 
    y= "total_requested"))